HBASE-4676 Prefix Compression - Trie data block encoding (Matt Corgan)

git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1443289 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Zhihong Yu 2013-02-07 00:36:24 +00:00
parent f17165ae08
commit b198a50434
93 changed files with 10512 additions and 52 deletions

View File

@ -38,7 +38,10 @@ public enum DataBlockEncoding {
// id 1 is reserved for the BITSET algorithm to be added later
PREFIX(2, createEncoder("org.apache.hadoop.hbase.io.encoding.PrefixKeyDeltaEncoder")),
DIFF(3, createEncoder("org.apache.hadoop.hbase.io.encoding.DiffKeyDeltaEncoder")),
FAST_DIFF(4, createEncoder("org.apache.hadoop.hbase.io.encoding.FastDiffDeltaEncoder"));
FAST_DIFF(4, createEncoder("org.apache.hadoop.hbase.io.encoding.FastDiffDeltaEncoder")),
// id 5 is reserved for the COPY_KEY algorithm for benchmarking
// COPY_KEY(5, createEncoder("org.apache.hadoop.hbase.io.encoding.CopyKeyDataBlockEncoder")),
PREFIX_TREE(6, createEncoder("org.apache.hbase.codec.prefixtree.PrefixTreeCodec"));
private final short id;
private final byte[] idInBytes;

View File

@ -18,6 +18,8 @@
package org.apache.hadoop.hbase.util;
import java.io.IOException;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.Collection;
@ -50,4 +52,14 @@ public class ByteRangeTool {
return ranges;
}
public static void write(OutputStream os, ByteRange byteRange) throws IOException {
os.write(byteRange.getBytes(), byteRange.getOffset(), byteRange.getLength());
}
public static void write(OutputStream os, ByteRange byteRange, int byteRangeInnerOffset)
throws IOException {
os.write(byteRange.getBytes(), byteRange.getOffset() + byteRangeInnerOffset,
byteRange.getLength() - byteRangeInnerOffset);
}
}

View File

@ -27,8 +27,10 @@ import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.security.AccessController;
import java.security.PrivilegedAction;
import java.util.Collection;
import java.util.Comparator;
import java.util.Iterator;
import java.util.List;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
@ -43,6 +45,7 @@ import org.apache.hadoop.io.WritableUtils;
import sun.misc.Unsafe;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.Lists;
/**
* Utility class that handles byte arrays, conversions to/from other types,
@ -1718,4 +1721,44 @@ public class Bytes {
return out;
}
public static boolean equals(List<byte[]> a, List<byte[]> b) {
if (a == null) {
if (b == null) {
return true;
}
return false;
}
if (b == null) {
return false;
}
if (a.size() != b.size()) {
return false;
}
for (int i = 0; i < a.size(); ++i) {
if (!Bytes.equals(a.get(i), b.get(i))) {
return false;
}
}
return true;
}
public static boolean isSorted(Collection<byte[]> arrays) {
byte[] previous = new byte[0];
for (byte[] array : IterableUtils.nullSafe(arrays)) {
if (Bytes.compareTo(previous, array) > 0) {
return false;
}
previous = array;
}
return true;
}
public static List<byte[]> getUtf8ByteArrays(List<String> strings) {
List<byte[]> byteArrays = Lists.newArrayListWithCapacity(CollectionUtils.nullSafeSize(strings));
for (String s : IterableUtils.nullSafe(strings)) {
byteArrays.add(Bytes.toBytes(s));
}
return byteArrays;
}
}

View File

@ -28,12 +28,15 @@ import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.util.ByteBufferUtils;
import org.apache.hadoop.io.WritableUtils;
import com.google.common.primitives.Bytes;
/**
* Generate list of key values which are very useful to test data block encoding
* and compression.
*/
public class RedundantKVGenerator {
// row settings
static byte[] DEFAULT_COMMON_PREFIX = new byte[0];
static int DEFAULT_NUMBER_OF_ROW_PREFIXES = 10;
static int DEFAULT_AVERAGE_PREFIX_LENGTH = 6;
static int DEFAULT_PREFIX_LENGTH_VARIANCE = 3;
@ -107,6 +110,7 @@ public class RedundantKVGenerator {
) {
this.randomizer = randomizer;
this.commonPrefix = DEFAULT_COMMON_PREFIX;
this.numberOfRowPrefixes = numberOfRowPrefixes;
this.averagePrefixLength = averagePrefixLength;
this.prefixLengthVariance = prefixLengthVariance;
@ -115,7 +119,7 @@ public class RedundantKVGenerator {
this.numberOfRows = numberOfRows;
this.chanceForSameQualifier = chanceForSameQualifier;
this.chanceForSimiliarQualifier = chanceForSimiliarQualifier;
this.chanceForSimilarQualifier = chanceForSimiliarQualifier;
this.averageQualifierLength = averageQualifierLength;
this.qualifierLengthVariance = qualifierLengthVariance;
@ -131,6 +135,7 @@ public class RedundantKVGenerator {
private Random randomizer;
// row settings
private byte[] commonPrefix;//global prefix before rowPrefixes
private int numberOfRowPrefixes;
private int averagePrefixLength = 6;
private int prefixLengthVariance = 3;
@ -138,9 +143,12 @@ public class RedundantKVGenerator {
private int suffixLengthVariance = 3;
private int numberOfRows = 500;
//family
private byte[] family;
// qualifier
private float chanceForSameQualifier = 0.5f;
private float chanceForSimiliarQualifier = 0.4f;
private float chanceForSimilarQualifier = 0.4f;
private int averageQualifierLength = 9;
private int qualifierLengthVariance = 3;
@ -161,7 +169,8 @@ public class RedundantKVGenerator {
prefixLengthVariance;
byte[] newPrefix = new byte[prefixLength];
randomizer.nextBytes(newPrefix);
prefixes.add(newPrefix);
byte[] newPrefixWithCommon = newPrefix;
prefixes.add(newPrefixWithCommon);
}
// generate rest of the row
@ -173,7 +182,8 @@ public class RedundantKVGenerator {
int randomPrefix = randomizer.nextInt(prefixes.size());
byte[] row = new byte[prefixes.get(randomPrefix).length +
suffixLength];
rows.add(row);
byte[] rowWithCommonPrefix = Bytes.concat(commonPrefix, row);
rows.add(rowWithCommonPrefix);
}
return rows;
@ -188,20 +198,22 @@ public class RedundantKVGenerator {
List<KeyValue> result = new ArrayList<KeyValue>();
List<byte[]> rows = generateRows();
Map<Integer, List<byte[]>> rowsToQualifier =
new HashMap<Integer, List<byte[]>>();
Map<Integer, List<byte[]>> rowsToQualifier = new HashMap<Integer, List<byte[]>>();
byte[] family = new byte[columnFamilyLength];
randomizer.nextBytes(family);
if(family==null){
family = new byte[columnFamilyLength];
randomizer.nextBytes(family);
}
long baseTimestamp = Math.abs(randomizer.nextLong()) /
baseTimestampDivide;
long baseTimestamp = Math.abs(randomizer.nextLong()) / baseTimestampDivide;
byte[] value = new byte[valueLength];
for (int i = 0; i < howMany; ++i) {
long timestamp = baseTimestamp + randomizer.nextInt(
timestampDiffSize);
long timestamp = baseTimestamp;
if(timestampDiffSize > 0){
timestamp += randomizer.nextInt(timestampDiffSize);
}
Integer rowId = randomizer.nextInt(rows.size());
byte[] row = rows.get(rowId);
@ -209,13 +221,11 @@ public class RedundantKVGenerator {
// occasionally completely different
byte[] qualifier;
float qualifierChance = randomizer.nextFloat();
if (!rowsToQualifier.containsKey(rowId) ||
qualifierChance > chanceForSameQualifier +
chanceForSimiliarQualifier) {
if (!rowsToQualifier.containsKey(rowId)
|| qualifierChance > chanceForSameQualifier + chanceForSimilarQualifier) {
int qualifierLength = averageQualifierLength;
qualifierLength +=
randomizer.nextInt(2 * qualifierLengthVariance + 1) -
qualifierLengthVariance;
qualifierLength += randomizer.nextInt(2 * qualifierLengthVariance + 1)
- qualifierLengthVariance;
qualifier = new byte[qualifierLength];
randomizer.nextBytes(qualifier);
@ -227,8 +237,8 @@ public class RedundantKVGenerator {
} else if (qualifierChance > chanceForSameQualifier) {
// similar qualifier
List<byte[]> previousQualifiers = rowsToQualifier.get(rowId);
byte[] originalQualifier = previousQualifiers.get(
randomizer.nextInt(previousQualifiers.size()));
byte[] originalQualifier = previousQualifiers.get(randomizer.nextInt(previousQualifiers
.size()));
qualifier = new byte[originalQualifier.length];
int commonPrefix = randomizer.nextInt(qualifier.length);
@ -241,8 +251,7 @@ public class RedundantKVGenerator {
} else {
// same qualifier
List<byte[]> previousQualifiers = rowsToQualifier.get(rowId);
qualifier = previousQualifiers.get(
randomizer.nextInt(previousQualifiers.size()));
qualifier = previousQualifiers.get(randomizer.nextInt(previousQualifiers.size()));
}
if (randomizer.nextFloat() < chanceForZeroValue) {
@ -286,5 +295,99 @@ public class RedundantKVGenerator {
return result;
}
/************************ get/set ***********************************/
public RedundantKVGenerator setCommonPrefix(byte[] prefix){
this.commonPrefix = prefix;
return this;
}
public RedundantKVGenerator setRandomizer(Random randomizer) {
this.randomizer = randomizer;
return this;
}
public RedundantKVGenerator setNumberOfRowPrefixes(int numberOfRowPrefixes) {
this.numberOfRowPrefixes = numberOfRowPrefixes;
return this;
}
public RedundantKVGenerator setAveragePrefixLength(int averagePrefixLength) {
this.averagePrefixLength = averagePrefixLength;
return this;
}
public RedundantKVGenerator setPrefixLengthVariance(int prefixLengthVariance) {
this.prefixLengthVariance = prefixLengthVariance;
return this;
}
public RedundantKVGenerator setAverageSuffixLength(int averageSuffixLength) {
this.averageSuffixLength = averageSuffixLength;
return this;
}
public RedundantKVGenerator setSuffixLengthVariance(int suffixLengthVariance) {
this.suffixLengthVariance = suffixLengthVariance;
return this;
}
public RedundantKVGenerator setNumberOfRows(int numberOfRows) {
this.numberOfRows = numberOfRows;
return this;
}
public RedundantKVGenerator setChanceForSameQualifier(float chanceForSameQualifier) {
this.chanceForSameQualifier = chanceForSameQualifier;
return this;
}
public RedundantKVGenerator setChanceForSimilarQualifier(float chanceForSimiliarQualifier) {
this.chanceForSimilarQualifier = chanceForSimiliarQualifier;
return this;
}
public RedundantKVGenerator setAverageQualifierLength(int averageQualifierLength) {
this.averageQualifierLength = averageQualifierLength;
return this;
}
public RedundantKVGenerator setQualifierLengthVariance(int qualifierLengthVariance) {
this.qualifierLengthVariance = qualifierLengthVariance;
return this;
}
public RedundantKVGenerator setColumnFamilyLength(int columnFamilyLength) {
this.columnFamilyLength = columnFamilyLength;
return this;
}
public RedundantKVGenerator setFamily(byte[] family) {
this.family = family;
this.columnFamilyLength = family.length;
return this;
}
public RedundantKVGenerator setValueLength(int valueLength) {
this.valueLength = valueLength;
return this;
}
public RedundantKVGenerator setChanceForZeroValue(float chanceForZeroValue) {
this.chanceForZeroValue = chanceForZeroValue;
return this;
}
public RedundantKVGenerator setBaseTimestampDivide(int baseTimestampDivide) {
this.baseTimestampDivide = baseTimestampDivide;
return this;
}
public RedundantKVGenerator setTimestampDiffSize(int timestampDiffSize) {
this.timestampDiffSize = timestampDiffSize;
return this;
}
}

View File

@ -79,22 +79,39 @@ public class CellComparator implements Comparator<Cell>, Serializable{
/**************** equals ****************************/
public static boolean equals(Cell a, Cell b){
if (!areKeyLengthsEqual(a, b)) {
return false;
}
//TODO compare byte[]'s in reverse since later bytes more likely to differ
return 0 == compareStatic(a, b);
return equalsRow(a, b)
&& equalsFamily(a, b)
&& equalsQualifier(a, b)
&& equalsTimestamp(a, b)
&& equalsType(a, b);
}
public static boolean equalsRow(Cell a, Cell b){
if(!areRowLengthsEqual(a, b)){
return false;
}
return 0 == Bytes.compareTo(
return Bytes.equals(
a.getRowArray(), a.getRowOffset(), a.getRowLength(),
b.getRowArray(), b.getRowOffset(), b.getRowLength());
}
public static boolean equalsFamily(Cell a, Cell b){
return Bytes.equals(
a.getFamilyArray(), a.getFamilyOffset(), a.getFamilyLength(),
b.getFamilyArray(), b.getFamilyOffset(), b.getFamilyLength());
}
public static boolean equalsQualifier(Cell a, Cell b){
return Bytes.equals(
a.getQualifierArray(), a.getQualifierOffset(), a.getQualifierLength(),
b.getQualifierArray(), b.getQualifierOffset(), b.getQualifierLength());
}
public static boolean equalsTimestamp(Cell a, Cell b){
return a.getTimestamp() == b.getTimestamp();
}
public static boolean equalsType(Cell a, Cell b){
return a.getTypeByte() == b.getTypeByte();
}
/********************* hashCode ************************/

View File

@ -18,6 +18,8 @@
package org.apache.hbase.cell;
import java.io.IOException;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hbase.Cell;
@ -45,6 +47,6 @@ public interface CellOutputStream {
* that can then be read from the implementation to be sent to disk, put in the block cache, or
* sent over the network.
*/
void flush();
void flush() throws IOException;
}

62
hbase-prefix-tree/pom.xml Normal file
View File

@ -0,0 +1,62 @@
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<!--
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-->
<modelVersion>4.0.0</modelVersion>
<parent>
<artifactId>hbase</artifactId>
<groupId>org.apache.hbase</groupId>
<version>0.95-SNAPSHOT</version>
<relativePath>..</relativePath>
</parent>
<artifactId>hbase-prefix-tree</artifactId>
<name>HBase - Prefix Tree</name>
<description>Prefix Tree Data Block Encoder</description>
<build>
<plugins>
<plugin>
<artifactId>maven-surefire-plugin</artifactId>
<!-- Always skip the second part executions, since we only run
simple unit tests in this module. -->
<executions>
<execution>
<id>secondPartTestsExecution</id>
<phase>test</phase>
<goals>
<goal>test</goal>
</goals>
<configuration>
<skip>true</skip>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
<dependencies>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-common</artifactId>
</dependency>
</dependencies>
</project>

View File

@ -0,0 +1,841 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hbase.codec.prefixtree;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.nio.ByteBuffer;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hbase.codec.prefixtree.encode.other.LongEncoder;
import org.apache.hbase.util.vint.UVIntTool;
import org.apache.hbase.util.vint.UVLongTool;
/**
* Information about the block. Stored at the beginning of the byte[]. Contains things
* like minimum timestamp and width of FInts in the row tree.
*
* Most fields stored in VInts that get decoded on the first access of each new block.
*/
@InterfaceAudience.Private
public class PrefixTreeBlockMeta {
/******************* static fields ********************/
public static final int VERSION = 0;
public static final int MAX_FAMILY_LENGTH = Byte.MAX_VALUE;// hard-coded in KeyValue
public static final int
NUM_LONGS = 2,
NUM_INTS = 22,
NUM_SHORTS = 0,//keyValueTypeWidth not persisted
NUM_SINGLE_BYTES = 2,
MAX_BYTES = Bytes.SIZEOF_LONG * NUM_LONGS
+ Bytes.SIZEOF_SHORT * NUM_SHORTS
+ Bytes.SIZEOF_INT * NUM_INTS
+ NUM_SINGLE_BYTES;
/**************** transient fields *********************/
protected int arrayOffset;
protected int bufferOffset;
/**************** persisted fields **********************/
// PrefixTree version to allow future format modifications
protected int version;
protected int numMetaBytes;
protected int numKeyValueBytes;
protected boolean includesMvccVersion;//probably don't need this explicitly, but only 1 byte
// split the byte[] into 6 sections for the different data types
protected int numRowBytes;
protected int numFamilyBytes;
protected int numQualifierBytes;
protected int numTimestampBytes;
protected int numMvccVersionBytes;
protected int numValueBytes;
// number of bytes in each section of fixed width FInts
protected int nextNodeOffsetWidth;
protected int familyOffsetWidth;
protected int qualifierOffsetWidth;
protected int timestampIndexWidth;
protected int mvccVersionIndexWidth;
protected int valueOffsetWidth;
protected int valueLengthWidth;
// used to pre-allocate structures for reading
protected int rowTreeDepth;
protected int maxRowLength;
protected int maxQualifierLength;
// the timestamp from which the deltas are calculated
protected long minTimestamp;
protected int timestampDeltaWidth;
protected long minMvccVersion;
protected int mvccVersionDeltaWidth;
protected boolean allSameType;
protected byte allTypes;
protected int numUniqueRows;
protected int numUniqueFamilies;
protected int numUniqueQualifiers;
/***************** constructors ********************/
public PrefixTreeBlockMeta() {
}
public PrefixTreeBlockMeta(InputStream is) throws IOException{
this.version = VERSION;
this.arrayOffset = 0;
this.bufferOffset = 0;
readVariableBytesFromInputStream(is);
}
/**
* @param buffer positioned at start of PtBlockMeta
*/
public PrefixTreeBlockMeta(ByteBuffer buffer) {
initOnBlock(buffer);
}
public void initOnBlock(ByteBuffer buffer) {
arrayOffset = buffer.arrayOffset();
bufferOffset = buffer.position();
readVariableBytesFromArray(buffer.array(), arrayOffset + bufferOffset);
}
/**************** operate on each field **********************/
public int calculateNumMetaBytes(){
int numBytes = 0;
numBytes += UVIntTool.numBytes(version);
numBytes += UVLongTool.numBytes(numMetaBytes);
numBytes += UVIntTool.numBytes(numKeyValueBytes);
++numBytes;//os.write(getIncludesMvccVersion());
numBytes += UVIntTool.numBytes(numRowBytes);
numBytes += UVIntTool.numBytes(numFamilyBytes);
numBytes += UVIntTool.numBytes(numQualifierBytes);
numBytes += UVIntTool.numBytes(numTimestampBytes);
numBytes += UVIntTool.numBytes(numMvccVersionBytes);
numBytes += UVIntTool.numBytes(numValueBytes);
numBytes += UVIntTool.numBytes(nextNodeOffsetWidth);
numBytes += UVIntTool.numBytes(familyOffsetWidth);
numBytes += UVIntTool.numBytes(qualifierOffsetWidth);
numBytes += UVIntTool.numBytes(timestampIndexWidth);
numBytes += UVIntTool.numBytes(mvccVersionIndexWidth);
numBytes += UVIntTool.numBytes(valueOffsetWidth);
numBytes += UVIntTool.numBytes(valueLengthWidth);
numBytes += UVIntTool.numBytes(rowTreeDepth);
numBytes += UVIntTool.numBytes(maxRowLength);
numBytes += UVIntTool.numBytes(maxQualifierLength);
numBytes += UVLongTool.numBytes(minTimestamp);
numBytes += UVIntTool.numBytes(timestampDeltaWidth);
numBytes += UVLongTool.numBytes(minMvccVersion);
numBytes += UVIntTool.numBytes(mvccVersionDeltaWidth);
++numBytes;//os.write(getAllSameTypeByte());
++numBytes;//os.write(allTypes);
numBytes += UVIntTool.numBytes(numUniqueRows);
numBytes += UVIntTool.numBytes(numUniqueFamilies);
numBytes += UVIntTool.numBytes(numUniqueQualifiers);
return numBytes;
}
public void writeVariableBytesToOutputStream(OutputStream os) throws IOException{
UVIntTool.writeBytes(version, os);
UVIntTool.writeBytes(numMetaBytes, os);
UVIntTool.writeBytes(numKeyValueBytes, os);
os.write(getIncludesMvccVersionByte());
UVIntTool.writeBytes(numRowBytes, os);
UVIntTool.writeBytes(numFamilyBytes, os);
UVIntTool.writeBytes(numQualifierBytes, os);
UVIntTool.writeBytes(numTimestampBytes, os);
UVIntTool.writeBytes(numMvccVersionBytes, os);
UVIntTool.writeBytes(numValueBytes, os);
UVIntTool.writeBytes(nextNodeOffsetWidth, os);
UVIntTool.writeBytes(familyOffsetWidth, os);
UVIntTool.writeBytes(qualifierOffsetWidth, os);
UVIntTool.writeBytes(timestampIndexWidth, os);
UVIntTool.writeBytes(mvccVersionIndexWidth, os);
UVIntTool.writeBytes(valueOffsetWidth, os);
UVIntTool.writeBytes(valueLengthWidth, os);
UVIntTool.writeBytes(rowTreeDepth, os);
UVIntTool.writeBytes(maxRowLength, os);
UVIntTool.writeBytes(maxQualifierLength, os);
UVLongTool.writeBytes(minTimestamp, os);
UVIntTool.writeBytes(timestampDeltaWidth, os);
UVLongTool.writeBytes(minMvccVersion, os);
UVIntTool.writeBytes(mvccVersionDeltaWidth, os);
os.write(getAllSameTypeByte());
os.write(allTypes);
UVIntTool.writeBytes(numUniqueRows, os);
UVIntTool.writeBytes(numUniqueFamilies, os);
UVIntTool.writeBytes(numUniqueQualifiers, os);
}
public void readVariableBytesFromInputStream(InputStream is) throws IOException{
version = UVIntTool.getInt(is);
numMetaBytes = UVIntTool.getInt(is);
numKeyValueBytes = UVIntTool.getInt(is);
setIncludesMvccVersion((byte) is.read());
numRowBytes = UVIntTool.getInt(is);
numFamilyBytes = UVIntTool.getInt(is);
numQualifierBytes = UVIntTool.getInt(is);
numTimestampBytes = UVIntTool.getInt(is);
numMvccVersionBytes = UVIntTool.getInt(is);
numValueBytes = UVIntTool.getInt(is);
nextNodeOffsetWidth = UVIntTool.getInt(is);
familyOffsetWidth = UVIntTool.getInt(is);
qualifierOffsetWidth = UVIntTool.getInt(is);
timestampIndexWidth = UVIntTool.getInt(is);
mvccVersionIndexWidth = UVIntTool.getInt(is);
valueOffsetWidth = UVIntTool.getInt(is);
valueLengthWidth = UVIntTool.getInt(is);
rowTreeDepth = UVIntTool.getInt(is);
maxRowLength = UVIntTool.getInt(is);
maxQualifierLength = UVIntTool.getInt(is);
minTimestamp = UVLongTool.getLong(is);
timestampDeltaWidth = UVIntTool.getInt(is);
minMvccVersion = UVLongTool.getLong(is);
mvccVersionDeltaWidth = UVIntTool.getInt(is);
setAllSameType((byte) is.read());
allTypes = (byte) is.read();
numUniqueRows = UVIntTool.getInt(is);
numUniqueFamilies = UVIntTool.getInt(is);
numUniqueQualifiers = UVIntTool.getInt(is);
}
public void readVariableBytesFromArray(byte[] bytes, int offset) {
int position = offset;
version = UVIntTool.getInt(bytes, position);
position += UVIntTool.numBytes(version);
numMetaBytes = UVIntTool.getInt(bytes, position);
position += UVIntTool.numBytes(numMetaBytes);
numKeyValueBytes = UVIntTool.getInt(bytes, position);
position += UVIntTool.numBytes(numKeyValueBytes);
setIncludesMvccVersion(bytes[position]);
++position;
numRowBytes = UVIntTool.getInt(bytes, position);
position += UVIntTool.numBytes(numRowBytes);
numFamilyBytes = UVIntTool.getInt(bytes, position);
position += UVIntTool.numBytes(numFamilyBytes);
numQualifierBytes = UVIntTool.getInt(bytes, position);
position += UVIntTool.numBytes(numQualifierBytes);
numTimestampBytes = UVIntTool.getInt(bytes, position);
position += UVIntTool.numBytes(numTimestampBytes);
numMvccVersionBytes = UVIntTool.getInt(bytes, position);
position += UVIntTool.numBytes(numMvccVersionBytes);
numValueBytes = UVIntTool.getInt(bytes, position);
position += UVIntTool.numBytes(numValueBytes);
nextNodeOffsetWidth = UVIntTool.getInt(bytes, position);
position += UVIntTool.numBytes(nextNodeOffsetWidth);
familyOffsetWidth = UVIntTool.getInt(bytes, position);
position += UVIntTool.numBytes(familyOffsetWidth);
qualifierOffsetWidth = UVIntTool.getInt(bytes, position);
position += UVIntTool.numBytes(qualifierOffsetWidth);
timestampIndexWidth = UVIntTool.getInt(bytes, position);
position += UVIntTool.numBytes(timestampIndexWidth);
mvccVersionIndexWidth = UVIntTool.getInt(bytes, position);
position += UVIntTool.numBytes(mvccVersionIndexWidth);
valueOffsetWidth = UVIntTool.getInt(bytes, position);
position += UVIntTool.numBytes(valueOffsetWidth);
valueLengthWidth = UVIntTool.getInt(bytes, position);
position += UVIntTool.numBytes(valueLengthWidth);
rowTreeDepth = UVIntTool.getInt(bytes, position);
position += UVIntTool.numBytes(rowTreeDepth);
maxRowLength = UVIntTool.getInt(bytes, position);
position += UVIntTool.numBytes(maxRowLength);
maxQualifierLength = UVIntTool.getInt(bytes, position);
position += UVIntTool.numBytes(maxQualifierLength);
minTimestamp = UVLongTool.getLong(bytes, position);
position += UVLongTool.numBytes(minTimestamp);
timestampDeltaWidth = UVIntTool.getInt(bytes, position);
position += UVIntTool.numBytes(timestampDeltaWidth);
minMvccVersion = UVLongTool.getLong(bytes, position);
position += UVLongTool.numBytes(minMvccVersion);
mvccVersionDeltaWidth = UVIntTool.getInt(bytes, position);
position += UVIntTool.numBytes(mvccVersionDeltaWidth);
setAllSameType(bytes[position]);
++position;
allTypes = bytes[position];
++position;
numUniqueRows = UVIntTool.getInt(bytes, position);
position += UVIntTool.numBytes(numUniqueRows);
numUniqueFamilies = UVIntTool.getInt(bytes, position);
position += UVIntTool.numBytes(numUniqueFamilies);
numUniqueQualifiers = UVIntTool.getInt(bytes, position);
position += UVIntTool.numBytes(numUniqueQualifiers);
}
//TODO method that can read directly from ByteBuffer instead of InputStream
/*************** methods *************************/
public int getKeyValueTypeWidth() {
return allSameType ? 0 : 1;
}
public byte getIncludesMvccVersionByte() {
return includesMvccVersion ? (byte) 1 : (byte) 0;
}
public void setIncludesMvccVersion(byte includesMvccVersionByte) {
includesMvccVersion = includesMvccVersionByte != 0;
}
public byte getAllSameTypeByte() {
return allSameType ? (byte) 1 : (byte) 0;
}
public void setAllSameType(byte allSameTypeByte) {
allSameType = allSameTypeByte != 0;
}
public boolean isAllSameTimestamp() {
return timestampIndexWidth == 0;
}
public boolean isAllSameMvccVersion() {
return mvccVersionIndexWidth == 0;
}
public void setTimestampFields(LongEncoder encoder){
this.minTimestamp = encoder.getMin();
this.timestampIndexWidth = encoder.getBytesPerIndex();
this.timestampDeltaWidth = encoder.getBytesPerDelta();
this.numTimestampBytes = encoder.getTotalCompressedBytes();
}
public void setMvccVersionFields(LongEncoder encoder){
this.minMvccVersion = encoder.getMin();
this.mvccVersionIndexWidth = encoder.getBytesPerIndex();
this.mvccVersionDeltaWidth = encoder.getBytesPerDelta();
this.numMvccVersionBytes = encoder.getTotalCompressedBytes();
}
/*************** Object methods *************************/
/**
* Generated by Eclipse
*/
@Override
public boolean equals(Object obj) {
if (this == obj)
return true;
if (obj == null)
return false;
if (getClass() != obj.getClass())
return false;
PrefixTreeBlockMeta other = (PrefixTreeBlockMeta) obj;
if (allSameType != other.allSameType)
return false;
if (allTypes != other.allTypes)
return false;
if (arrayOffset != other.arrayOffset)
return false;
if (bufferOffset != other.bufferOffset)
return false;
if (valueLengthWidth != other.valueLengthWidth)
return false;
if (valueOffsetWidth != other.valueOffsetWidth)
return false;
if (familyOffsetWidth != other.familyOffsetWidth)
return false;
if (includesMvccVersion != other.includesMvccVersion)
return false;
if (maxQualifierLength != other.maxQualifierLength)
return false;
if (maxRowLength != other.maxRowLength)
return false;
if (mvccVersionDeltaWidth != other.mvccVersionDeltaWidth)
return false;
if (mvccVersionIndexWidth != other.mvccVersionIndexWidth)
return false;
if (minMvccVersion != other.minMvccVersion)
return false;
if (minTimestamp != other.minTimestamp)
return false;
if (nextNodeOffsetWidth != other.nextNodeOffsetWidth)
return false;
if (numValueBytes != other.numValueBytes)
return false;
if (numFamilyBytes != other.numFamilyBytes)
return false;
if (numMvccVersionBytes != other.numMvccVersionBytes)
return false;
if (numMetaBytes != other.numMetaBytes)
return false;
if (numQualifierBytes != other.numQualifierBytes)
return false;
if (numRowBytes != other.numRowBytes)
return false;
if (numTimestampBytes != other.numTimestampBytes)
return false;
if (numUniqueFamilies != other.numUniqueFamilies)
return false;
if (numUniqueQualifiers != other.numUniqueQualifiers)
return false;
if (numUniqueRows != other.numUniqueRows)
return false;
if (numKeyValueBytes != other.numKeyValueBytes)
return false;
if (qualifierOffsetWidth != other.qualifierOffsetWidth)
return false;
if (rowTreeDepth != other.rowTreeDepth)
return false;
if (timestampDeltaWidth != other.timestampDeltaWidth)
return false;
if (timestampIndexWidth != other.timestampIndexWidth)
return false;
if (version != other.version)
return false;
return true;
}
/**
* Generated by Eclipse
*/
@Override
public int hashCode() {
final int prime = 31;
int result = 1;
result = prime * result + (allSameType ? 1231 : 1237);
result = prime * result + allTypes;
result = prime * result + arrayOffset;
result = prime * result + bufferOffset;
result = prime * result + valueLengthWidth;
result = prime * result + valueOffsetWidth;
result = prime * result + familyOffsetWidth;
result = prime * result + (includesMvccVersion ? 1231 : 1237);
result = prime * result + maxQualifierLength;
result = prime * result + maxRowLength;
result = prime * result + mvccVersionDeltaWidth;
result = prime * result + mvccVersionIndexWidth;
result = prime * result + (int) (minMvccVersion ^ (minMvccVersion >>> 32));
result = prime * result + (int) (minTimestamp ^ (minTimestamp >>> 32));
result = prime * result + nextNodeOffsetWidth;
result = prime * result + numValueBytes;
result = prime * result + numFamilyBytes;
result = prime * result + numMvccVersionBytes;
result = prime * result + numMetaBytes;
result = prime * result + numQualifierBytes;
result = prime * result + numRowBytes;
result = prime * result + numTimestampBytes;
result = prime * result + numUniqueFamilies;
result = prime * result + numUniqueQualifiers;
result = prime * result + numUniqueRows;
result = prime * result + numKeyValueBytes;
result = prime * result + qualifierOffsetWidth;
result = prime * result + rowTreeDepth;
result = prime * result + timestampDeltaWidth;
result = prime * result + timestampIndexWidth;
result = prime * result + version;
return result;
}
/**
* Generated by Eclipse
*/
@Override
public String toString() {
StringBuilder builder = new StringBuilder();
builder.append("PtBlockMeta [arrayOffset=");
builder.append(arrayOffset);
builder.append(", bufferOffset=");
builder.append(bufferOffset);
builder.append(", version=");
builder.append(version);
builder.append(", numMetaBytes=");
builder.append(numMetaBytes);
builder.append(", numKeyValueBytes=");
builder.append(numKeyValueBytes);
builder.append(", includesMvccVersion=");
builder.append(includesMvccVersion);
builder.append(", numRowBytes=");
builder.append(numRowBytes);
builder.append(", numFamilyBytes=");
builder.append(numFamilyBytes);
builder.append(", numQualifierBytes=");
builder.append(numQualifierBytes);
builder.append(", numTimestampBytes=");
builder.append(numTimestampBytes);
builder.append(", numMvccVersionBytes=");
builder.append(numMvccVersionBytes);
builder.append(", numValueBytes=");
builder.append(numValueBytes);
builder.append(", nextNodeOffsetWidth=");
builder.append(nextNodeOffsetWidth);
builder.append(", familyOffsetWidth=");
builder.append(familyOffsetWidth);
builder.append(", qualifierOffsetWidth=");
builder.append(qualifierOffsetWidth);
builder.append(", timestampIndexWidth=");
builder.append(timestampIndexWidth);
builder.append(", mvccVersionIndexWidth=");
builder.append(mvccVersionIndexWidth);
builder.append(", valueOffsetWidth=");
builder.append(valueOffsetWidth);
builder.append(", valueLengthWidth=");
builder.append(valueLengthWidth);
builder.append(", rowTreeDepth=");
builder.append(rowTreeDepth);
builder.append(", maxRowLength=");
builder.append(maxRowLength);
builder.append(", maxQualifierLength=");
builder.append(maxQualifierLength);
builder.append(", minTimestamp=");
builder.append(minTimestamp);
builder.append(", timestampDeltaWidth=");
builder.append(timestampDeltaWidth);
builder.append(", minMvccVersion=");
builder.append(minMvccVersion);
builder.append(", mvccVersionDeltaWidth=");
builder.append(mvccVersionDeltaWidth);
builder.append(", allSameType=");
builder.append(allSameType);
builder.append(", allTypes=");
builder.append(allTypes);
builder.append(", numUniqueRows=");
builder.append(numUniqueRows);
builder.append(", numUniqueFamilies=");
builder.append(numUniqueFamilies);
builder.append(", numUniqueQualifiers=");
builder.append(numUniqueQualifiers);
builder.append("]");
return builder.toString();
}
/************** absolute getters *******************/
public int getAbsoluteMetaOffset() {
return arrayOffset + bufferOffset;
}
public int getAbsoluteRowOffset() {
return getAbsoluteMetaOffset() + numMetaBytes;
}
public int getAbsoluteFamilyOffset() {
return getAbsoluteRowOffset() + numRowBytes;
}
public int getAbsoluteQualifierOffset() {
return getAbsoluteFamilyOffset() + numFamilyBytes;
}
public int getAbsoluteTimestampOffset() {
return getAbsoluteQualifierOffset() + numQualifierBytes;
}
public int getAbsoluteMvccVersionOffset() {
return getAbsoluteTimestampOffset() + numTimestampBytes;
}
public int getAbsoluteValueOffset() {
return getAbsoluteMvccVersionOffset() + numMvccVersionBytes;
}
/*************** get/set ***************************/
public int getTimestampDeltaWidth() {
return timestampDeltaWidth;
}
public void setTimestampDeltaWidth(int timestampDeltaWidth) {
this.timestampDeltaWidth = timestampDeltaWidth;
}
public int getValueOffsetWidth() {
return valueOffsetWidth;
}
public void setValueOffsetWidth(int dataOffsetWidth) {
this.valueOffsetWidth = dataOffsetWidth;
}
public int getValueLengthWidth() {
return valueLengthWidth;
}
public void setValueLengthWidth(int dataLengthWidth) {
this.valueLengthWidth = dataLengthWidth;
}
public int getMaxRowLength() {
return maxRowLength;
}
public void setMaxRowLength(int maxRowLength) {
this.maxRowLength = maxRowLength;
}
public long getMinTimestamp() {
return minTimestamp;
}
public void setMinTimestamp(long minTimestamp) {
this.minTimestamp = minTimestamp;
}
public byte getAllTypes() {
return allTypes;
}
public void setAllTypes(byte allTypes) {
this.allTypes = allTypes;
}
public boolean isAllSameType() {
return allSameType;
}
public void setAllSameType(boolean allSameType) {
this.allSameType = allSameType;
}
public int getNextNodeOffsetWidth() {
return nextNodeOffsetWidth;
}
public void setNextNodeOffsetWidth(int nextNodeOffsetWidth) {
this.nextNodeOffsetWidth = nextNodeOffsetWidth;
}
public int getNumRowBytes() {
return numRowBytes;
}
public void setNumRowBytes(int numRowBytes) {
this.numRowBytes = numRowBytes;
}
public int getNumTimestampBytes() {
return numTimestampBytes;
}
public void setNumTimestampBytes(int numTimestampBytes) {
this.numTimestampBytes = numTimestampBytes;
}
public int getNumValueBytes() {
return numValueBytes;
}
public void setNumValueBytes(int numValueBytes) {
this.numValueBytes = numValueBytes;
}
public int getNumMetaBytes() {
return numMetaBytes;
}
public void setNumMetaBytes(int numMetaBytes) {
this.numMetaBytes = numMetaBytes;
}
public int getArrayOffset() {
return arrayOffset;
}
public void setArrayOffset(int arrayOffset) {
this.arrayOffset = arrayOffset;
}
public int getBufferOffset() {
return bufferOffset;
}
public void setBufferOffset(int bufferOffset) {
this.bufferOffset = bufferOffset;
}
public int getNumKeyValueBytes() {
return numKeyValueBytes;
}
public void setNumKeyValueBytes(int numKeyValueBytes) {
this.numKeyValueBytes = numKeyValueBytes;
}
public int getRowTreeDepth() {
return rowTreeDepth;
}
public void setRowTreeDepth(int rowTreeDepth) {
this.rowTreeDepth = rowTreeDepth;
}
public int getNumMvccVersionBytes() {
return numMvccVersionBytes;
}
public void setNumMvccVersionBytes(int numMvccVersionBytes) {
this.numMvccVersionBytes = numMvccVersionBytes;
}
public int getMvccVersionDeltaWidth() {
return mvccVersionDeltaWidth;
}
public void setMvccVersionDeltaWidth(int mvccVersionDeltaWidth) {
this.mvccVersionDeltaWidth = mvccVersionDeltaWidth;
}
public long getMinMvccVersion() {
return minMvccVersion;
}
public void setMinMvccVersion(long minMvccVersion) {
this.minMvccVersion = minMvccVersion;
}
public int getNumFamilyBytes() {
return numFamilyBytes;
}
public void setNumFamilyBytes(int numFamilyBytes) {
this.numFamilyBytes = numFamilyBytes;
}
public int getFamilyOffsetWidth() {
return familyOffsetWidth;
}
public void setFamilyOffsetWidth(int familyOffsetWidth) {
this.familyOffsetWidth = familyOffsetWidth;
}
public int getNumUniqueRows() {
return numUniqueRows;
}
public void setNumUniqueRows(int numUniqueRows) {
this.numUniqueRows = numUniqueRows;
}
public int getNumUniqueFamilies() {
return numUniqueFamilies;
}
public void setNumUniqueFamilies(int numUniqueFamilies) {
this.numUniqueFamilies = numUniqueFamilies;
}
public int getNumUniqueQualifiers() {
return numUniqueQualifiers;
}
public void setNumUniqueQualifiers(int numUniqueQualifiers) {
this.numUniqueQualifiers = numUniqueQualifiers;
}
public int getNumQualifierBytes() {
return numQualifierBytes;
}
public void setNumQualifierBytes(int numQualifierBytes) {
this.numQualifierBytes = numQualifierBytes;
}
public int getQualifierOffsetWidth() {
return qualifierOffsetWidth;
}
public void setQualifierOffsetWidth(int qualifierOffsetWidth) {
this.qualifierOffsetWidth = qualifierOffsetWidth;
}
public int getMaxQualifierLength() {
return maxQualifierLength;
}
public void setMaxQualifierLength(int maxQualifierLength) {
this.maxQualifierLength = maxQualifierLength;
}
public int getTimestampIndexWidth() {
return timestampIndexWidth;
}
public void setTimestampIndexWidth(int timestampIndexWidth) {
this.timestampIndexWidth = timestampIndexWidth;
}
public int getMvccVersionIndexWidth() {
return mvccVersionIndexWidth;
}
public void setMvccVersionIndexWidth(int mvccVersionIndexWidth) {
this.mvccVersionIndexWidth = mvccVersionIndexWidth;
}
public int getVersion() {
return version;
}
public void setVersion(int version) {
this.version = version;
}
public boolean isIncludesMvccVersion() {
return includesMvccVersion;
}
public void setIncludesMvccVersion(boolean includesMvccVersion) {
this.includesMvccVersion = includesMvccVersion;
}
}

View File

@ -0,0 +1,209 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hbase.codec.prefixtree;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.IOException;
import java.nio.ByteBuffer;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.KeyValue.KeyComparator;
import org.apache.hadoop.hbase.KeyValue.MetaKeyComparator;
import org.apache.hadoop.hbase.KeyValue.RootKeyComparator;
import org.apache.hadoop.hbase.KeyValueTool;
import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
import org.apache.hadoop.hbase.io.encoding.DataBlockEncoder;
import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
import org.apache.hadoop.hbase.io.encoding.HFileBlockDecodingContext;
import org.apache.hadoop.hbase.io.encoding.HFileBlockDefaultDecodingContext;
import org.apache.hadoop.hbase.io.encoding.HFileBlockDefaultEncodingContext;
import org.apache.hadoop.hbase.io.encoding.HFileBlockEncodingContext;
import org.apache.hadoop.hbase.io.hfile.BlockType;
import org.apache.hadoop.hbase.util.ByteBufferUtils;
import org.apache.hadoop.io.RawComparator;
import org.apache.hbase.codec.prefixtree.decode.DecoderFactory;
import org.apache.hbase.codec.prefixtree.decode.PrefixTreeArraySearcher;
import org.apache.hbase.codec.prefixtree.encode.EncoderFactory;
import org.apache.hbase.codec.prefixtree.encode.PrefixTreeEncoder;
import org.apache.hbase.codec.prefixtree.scanner.CellSearcher;
/**
* This class is created via reflection in DataBlockEncoding enum. Update the enum if class name or
* package changes.
* <p/>
* PrefixTreeDataBlockEncoder implementation of DataBlockEncoder. This is the primary entry point
* for PrefixTree encoding and decoding. Encoding is delegated to instances of
* {@link PrefixTreeEncoder}, and decoding is delegated to instances of
* {@link org.apache.hbase.codec.prefixtree.scanner.CellSearcher}. Encoder and decoder instances are
* created and recycled by static PtEncoderFactory and PtDecoderFactory.
*/
@InterfaceAudience.Private
public class PrefixTreeCodec implements DataBlockEncoder{
/**
* no-arg constructor for reflection
*/
public PrefixTreeCodec() {
}
/**
* Copied from BufferedDataBlockEncoder. Almost definitely can be improved, but i'm not familiar
* enough with the concept of the HFileBlockEncodingContext.
*/
@Override
public void encodeKeyValues(ByteBuffer in, boolean includesMvccVersion,
HFileBlockEncodingContext blkEncodingCtx) throws IOException {
if (blkEncodingCtx.getClass() != HFileBlockDefaultEncodingContext.class) {
throw new IOException(this.getClass().getName() + " only accepts "
+ HFileBlockDefaultEncodingContext.class.getName() + " as the " + "encoding context.");
}
HFileBlockDefaultEncodingContext encodingCtx
= (HFileBlockDefaultEncodingContext) blkEncodingCtx;
encodingCtx.prepareEncoding();
DataOutputStream dataOut = encodingCtx.getOutputStreamForEncoder();
internalEncodeKeyValues(dataOut, in, includesMvccVersion);
//do i need to check this, or will it always be DataBlockEncoding.PREFIX_TREE?
if (encodingCtx.getDataBlockEncoding() != DataBlockEncoding.NONE) {
encodingCtx.postEncoding(BlockType.ENCODED_DATA);
} else {
encodingCtx.postEncoding(BlockType.DATA);
}
}
private void internalEncodeKeyValues(DataOutputStream encodedOutputStream,
ByteBuffer rawKeyValues, boolean includesMvccVersion) throws IOException {
rawKeyValues.rewind();
PrefixTreeEncoder builder = EncoderFactory.checkOut(encodedOutputStream, includesMvccVersion);
try{
KeyValue kv;
while ((kv = KeyValueTool.nextShallowCopy(rawKeyValues, includesMvccVersion)) != null) {
builder.write(kv);
}
builder.flush();
}finally{
EncoderFactory.checkIn(builder);
}
}
@Override
public ByteBuffer decodeKeyValues(DataInputStream source, boolean includesMvccVersion)
throws IOException {
return decodeKeyValues(source, 0, 0, includesMvccVersion);
}
/**
* I don't think this method is called during normal HBase operation, so efficiency is not
* important.
*/
@Override
public ByteBuffer decodeKeyValues(DataInputStream source, int allocateHeaderLength,
int skipLastBytes, boolean includesMvccVersion) throws IOException {
ByteBuffer sourceAsBuffer = ByteBufferUtils.drainInputStreamToBuffer(source);// waste
sourceAsBuffer.mark();
PrefixTreeBlockMeta blockMeta = new PrefixTreeBlockMeta(sourceAsBuffer);
sourceAsBuffer.rewind();
int numV1BytesWithHeader = allocateHeaderLength + blockMeta.getNumKeyValueBytes();
byte[] keyValueBytesWithHeader = new byte[numV1BytesWithHeader];
ByteBuffer result = ByteBuffer.wrap(keyValueBytesWithHeader);
result.rewind();
CellSearcher searcher = null;
try {
searcher = DecoderFactory.checkOut(sourceAsBuffer, includesMvccVersion);
while (searcher.next()) {
KeyValue currentCell = KeyValueTool.copyToNewKeyValue(searcher.getCurrent());
// needs to be modified for DirectByteBuffers. no existing methods to
// write VLongs to byte[]
int offset = result.arrayOffset() + result.position();
KeyValueTool.appendToByteArray(currentCell, result.array(), offset);
int keyValueLength = KeyValueTool.length(currentCell);
ByteBufferUtils.skip(result, keyValueLength);
offset += keyValueLength;
if (includesMvccVersion) {
ByteBufferUtils.writeVLong(result, currentCell.getMvccVersion());
}
}
result.position(result.limit());//make it appear as if we were appending
return result;
} finally {
DecoderFactory.checkIn(searcher);
}
}
@Override
public ByteBuffer getFirstKeyInBlock(ByteBuffer block) {
block.rewind();
PrefixTreeArraySearcher searcher = null;
try {
//should i includeMemstoreTS (second argument)? i think PrefixKeyDeltaEncoder is, so i will
searcher = DecoderFactory.checkOut(block, true);
if (!searcher.positionAtFirstCell()) {
return null;
}
return KeyValueTool.copyKeyToNewByteBuffer(searcher.getCurrent());
} finally {
DecoderFactory.checkIn(searcher);
}
}
@Override
public HFileBlockEncodingContext newDataBlockEncodingContext(Algorithm compressionAlgorithm,
DataBlockEncoding encoding, byte[] header) {
if(DataBlockEncoding.PREFIX_TREE != encoding){
//i'm not sure why encoding is in the interface. Each encoder implementation should probably
//know it's encoding type
throw new IllegalArgumentException("only DataBlockEncoding.PREFIX_TREE supported");
}
return new HFileBlockDefaultEncodingContext(compressionAlgorithm, encoding, header);
}
@Override
public HFileBlockDecodingContext newDataBlockDecodingContext(Algorithm compressionAlgorithm) {
return new HFileBlockDefaultDecodingContext(compressionAlgorithm);
}
/**
* Is this the correct handling of an illegal comparator? How to prevent that from getting all
* the way to this point.
*/
@Override
public EncodedSeeker createSeeker(RawComparator<byte[]> comparator, boolean includesMvccVersion) {
if(! (comparator instanceof KeyComparator)){
throw new IllegalArgumentException("comparator must be KeyValue.KeyComparator");
}
if(comparator instanceof MetaKeyComparator){
throw new IllegalArgumentException("DataBlockEncoding.PREFIX_TREE not compatible with META "
+"table");
}
if(comparator instanceof RootKeyComparator){
throw new IllegalArgumentException("DataBlockEncoding.PREFIX_TREE not compatible with ROOT "
+"table");
}
return new PrefixTreeSeeker(includesMvccVersion);
}
}

View File

@ -0,0 +1,216 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hbase.codec.prefixtree;
import java.nio.ByteBuffer;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.KeyValueTool;
import org.apache.hadoop.hbase.io.encoding.DataBlockEncoder.EncodedSeeker;
import org.apache.hbase.Cell;
import org.apache.hbase.cell.CellScannerPosition;
import org.apache.hbase.cell.CellTool;
import org.apache.hbase.codec.prefixtree.decode.DecoderFactory;
import org.apache.hbase.codec.prefixtree.decode.PrefixTreeArraySearcher;
/**
* These methods have the same definition as any implementation of the EncodedSeeker.
*
* In the future, the EncodedSeeker could be modified to work with the Cell interface directly. It
* currently returns a new KeyValue object each time getKeyValue is called. This is not horrible,
* but in order to create a new KeyValue object, we must first allocate a new byte[] and copy in
* the data from the PrefixTreeCell. It is somewhat heavyweight right now.
*/
@InterfaceAudience.Private
public class PrefixTreeSeeker implements EncodedSeeker {
protected ByteBuffer block;
protected boolean includeMvccVersion;
protected PrefixTreeArraySearcher ptSearcher;
public PrefixTreeSeeker(boolean includeMvccVersion) {
this.includeMvccVersion = includeMvccVersion;
}
@Override
public void setCurrentBuffer(ByteBuffer fullBlockBuffer) {
block = fullBlockBuffer;
ptSearcher = DecoderFactory.checkOut(block, includeMvccVersion);
rewind();
}
/**
* Currently unused.
* <p/>
* TODO performance leak. should reuse the searchers. hbase does not currently have a hook where
* this can be called
*/
public void releaseCurrentSearcher(){
DecoderFactory.checkIn(ptSearcher);
}
@Override
public ByteBuffer getKeyDeepCopy() {
return KeyValueTool.copyKeyToNewByteBuffer(ptSearcher.getCurrent());
}
@Override
public ByteBuffer getValueShallowCopy() {
return CellTool.getValueBufferShallowCopy(ptSearcher.getCurrent());
}
/**
* currently must do deep copy into new array
*/
@Override
public ByteBuffer getKeyValueBuffer() {
return KeyValueTool.copyToNewByteBuffer(ptSearcher.getCurrent());
}
/**
* currently must do deep copy into new array
*/
@Override
public KeyValue getKeyValue() {
return KeyValueTool.copyToNewKeyValue(ptSearcher.getCurrent());
}
/**
* Currently unused.
* <p/>
* A nice, lightweight reference, though the underlying cell is transient. This method may return
* the same reference to the backing PrefixTreeCell repeatedly, while other implementations may
* return a different reference for each Cell.
* <p/>
* The goal will be to transition the upper layers of HBase, like Filters and KeyValueHeap, to use
* this method instead of the getKeyValue() methods above.
*/
// @Override
public Cell getCurrent() {
return ptSearcher.getCurrent();
}
@Override
public void rewind() {
ptSearcher.positionAtFirstCell();
}
@Override
public boolean next() {
return ptSearcher.next();
}
// @Override
public boolean advance() {
return ptSearcher.next();
}
private static final boolean USE_POSITION_BEFORE = false;
/**
* Seek forward only (should be called reseekToKeyInBlock?).
* <p/>
* If the exact key is found look at the seekBefore variable and:<br/>
* - if true: go to the previous key if it's true<br/>
* - if false: stay on the exact key
* <p/>
* If the exact key is not found, then go to the previous key *if possible*, but remember to leave
* the scanner in a valid state if possible.
* <p/>
* @param keyOnlyBytes KeyValue format of a Cell's key at which to position the seeker
* @param offset offset into the keyOnlyBytes array
* @param length number of bytes of the keyOnlyBytes array to use
* @param forceBeforeOnExactMatch if an exact match is found and seekBefore=true, back up one Cell
* @return 0 if the seeker is on the exact key<br/>
* 1 if the seeker is not on the key for any reason, including seekBefore being true
*/
@Override
public int seekToKeyInBlock(byte[] keyOnlyBytes, int offset, int length,
boolean forceBeforeOnExactMatch) {
if (USE_POSITION_BEFORE) {
return seekToOrBeforeUsingPositionAtOrBefore(keyOnlyBytes, offset, length,
forceBeforeOnExactMatch);
}else{
return seekToOrBeforeUsingPositionAtOrAfter(keyOnlyBytes, offset, length,
forceBeforeOnExactMatch);
}
}
/*
* Support both of these options since the underlying PrefixTree supports both. Possibly
* expand the EncodedSeeker to utilize them both.
*/
protected int seekToOrBeforeUsingPositionAtOrBefore(byte[] keyOnlyBytes, int offset, int length,
boolean forceBeforeOnExactMatch){
// this does a deep copy of the key byte[] because the CellSearcher interface wants a Cell
KeyValue kv = KeyValue.createKeyValueFromKey(keyOnlyBytes, offset, length);
CellScannerPosition position = ptSearcher.seekForwardToOrBefore(kv);
if(CellScannerPosition.AT == position){
if (forceBeforeOnExactMatch) {
ptSearcher.previous();
return 1;
}
return 0;
}
return 1;
}
protected int seekToOrBeforeUsingPositionAtOrAfter(byte[] keyOnlyBytes, int offset, int length,
boolean forceBeforeOnExactMatch){
// this does a deep copy of the key byte[] because the CellSearcher interface wants a Cell
KeyValue kv = KeyValue.createKeyValueFromKey(keyOnlyBytes, offset, length);
//should probably switch this to use the seekForwardToOrBefore method
CellScannerPosition position = ptSearcher.seekForwardToOrAfter(kv);
if(CellScannerPosition.AT == position){
if (forceBeforeOnExactMatch) {
ptSearcher.previous();
return 1;
}
return 0;
}
if(CellScannerPosition.AFTER == position){
if(!ptSearcher.isBeforeFirst()){
ptSearcher.previous();
}
return 1;
}
if(position == CellScannerPosition.AFTER_LAST){
return 1;
}
throw new RuntimeException("unexpected CellScannerPosition:"+position);
}
}

View File

@ -0,0 +1,63 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hbase.codec.prefixtree.decode;
import java.nio.ByteBuffer;
import java.util.Queue;
import java.util.concurrent.LinkedBlockingQueue;
import org.apache.hadoop.classification.InterfaceAudience;
/**
* Pools PrefixTreeArraySearcher objects. Each Searcher can consist of hundreds or thousands of
* objects and 1 is needed for each HFile during a Get operation. With tens of thousands of
* Gets/second, reusing these searchers may save a lot of young gen collections.
* <p/>
* Alternative implementation would be a ByteBufferSearcherPool (not implemented yet).
*/
@InterfaceAudience.Private
public class ArraySearcherPool {
/**
* One decoder is needed for each storefile for each Get operation so we may need hundreds at the
* same time, however, decoding is a CPU bound activity so should limit this to something in the
* realm of maximum reasonable active threads.
*/
private static final Integer MAX_POOL_SIZE = 1000;
protected Queue<PrefixTreeArraySearcher> pool
= new LinkedBlockingQueue<PrefixTreeArraySearcher>(MAX_POOL_SIZE);
public PrefixTreeArraySearcher checkOut(ByteBuffer buffer, boolean includesMvccVersion) {
PrefixTreeArraySearcher searcher = pool.poll();//will return null if pool is empty
searcher = DecoderFactory.ensureArraySearcherValid(buffer, searcher, includesMvccVersion);
return searcher;
}
public void checkIn(PrefixTreeArraySearcher searcher) {
searcher.releaseBlockReference();
pool.offer(searcher);
}
@Override
public String toString() {
return ("poolSize:" + pool.size());
}
}

View File

@ -0,0 +1,89 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hbase.codec.prefixtree.decode;
import java.nio.ByteBuffer;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hbase.codec.prefixtree.PrefixTreeBlockMeta;
import org.apache.hbase.codec.prefixtree.scanner.CellSearcher;
/**
* Static wrapper class for the ArraySearcherPool.
*/
@InterfaceAudience.Private
public class DecoderFactory {
private static final ArraySearcherPool POOL = new ArraySearcherPool();
//TODO will need a PrefixTreeSearcher on top of CellSearcher
public static PrefixTreeArraySearcher checkOut(final ByteBuffer buffer,
boolean includeMvccVersion) {
if (buffer.isDirect()) {
throw new IllegalArgumentException("DirectByteBuffers not supported yet");
// TODO implement PtByteBufferBlockScanner
}
PrefixTreeArraySearcher searcher = POOL.checkOut(buffer,
includeMvccVersion);
return searcher;
}
public static void checkIn(CellSearcher pSearcher) {
if (pSearcher == null) {
return;
}
if (! (pSearcher instanceof PrefixTreeArraySearcher)) {
throw new IllegalArgumentException("Cannot return "+pSearcher.getClass()+" to "
+DecoderFactory.class);
}
PrefixTreeArraySearcher searcher = (PrefixTreeArraySearcher) pSearcher;
POOL.checkIn(searcher);
}
/**************************** helper ******************************/
public static PrefixTreeArraySearcher ensureArraySearcherValid(ByteBuffer buffer,
PrefixTreeArraySearcher searcher, boolean includeMvccVersion) {
if (searcher == null) {
PrefixTreeBlockMeta blockMeta = new PrefixTreeBlockMeta(buffer);
searcher = new PrefixTreeArraySearcher(blockMeta, blockMeta.getRowTreeDepth(),
blockMeta.getMaxRowLength(), blockMeta.getMaxQualifierLength());
searcher.initOnBlock(blockMeta, buffer.array(), includeMvccVersion);
return searcher;
}
PrefixTreeBlockMeta blockMeta = searcher.getBlockMeta();
blockMeta.initOnBlock(buffer);
if (!searcher.areBuffersBigEnough()) {
int maxRowTreeStackNodes = Math.max(blockMeta.getRowTreeDepth(),
searcher.getMaxRowTreeStackNodes());
int rowBufferLength = Math.max(blockMeta.getMaxRowLength(), searcher.getRowBufferLength());
int qualifierBufferLength = Math.max(blockMeta.getMaxQualifierLength(),
searcher.getQualifierBufferLength());
searcher = new PrefixTreeArraySearcher(blockMeta, maxRowTreeStackNodes, rowBufferLength,
qualifierBufferLength);
}
//this is where we parse the BlockMeta
searcher.initOnBlock(blockMeta, buffer.array(), includeMvccVersion);
return searcher;
}
}

View File

@ -0,0 +1,144 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hbase.codec.prefixtree.decode;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hbase.codec.prefixtree.PrefixTreeBlockMeta;
import org.apache.hbase.codec.prefixtree.scanner.ReversibleCellScanner;
/**
* Methods for going backwards through a PrefixTree block. This class is split out on its own to
* simplify the Scanner superclass and Searcher subclass.
*/
@InterfaceAudience.Private
public class PrefixTreeArrayReversibleScanner extends PrefixTreeArrayScanner implements
ReversibleCellScanner {
/***************** construct ******************************/
public PrefixTreeArrayReversibleScanner(PrefixTreeBlockMeta blockMeta, int rowTreeDepth,
int rowBufferLength, int qualifierBufferLength) {
super(blockMeta, rowTreeDepth, rowBufferLength, qualifierBufferLength);
}
/***************** Object methods ***************************/
@Override
public boolean equals(Object obj) {
//trivial override to confirm intent (findbugs)
return super.equals(obj);
}
/***************** methods **********************************/
@Override
public boolean previous() {
if (afterLast) {
afterLast = false;
positionAtLastCell();
return true;
}
if (beforeFirst) {
return false;
}
if (isFirstCellInRow()) {
previousRowInternal();
if (beforeFirst) {
return false;
}
populateLastNonRowFields();
return true;
}
populatePreviousNonRowFields();
return true;
}
@Override
public boolean previousRow(boolean endOfRow) {
previousRowInternal();
if(beforeFirst){
return false;
}
if(endOfRow){
populateLastNonRowFields();
}else{
populateFirstNonRowFields();
}
return true;
}
private boolean previousRowInternal() {
if (beforeFirst) {
return false;
}
if (afterLast) {
positionAtLastRow();
return true;
}
if (currentRowNode.hasOccurrences()) {
discardCurrentRowNode(false);
if(currentRowNode==null){
return false;
}
}
while (!beforeFirst) {
if (isDirectlyAfterNub()) {//we are about to back up to the nub
currentRowNode.resetFanIndex();//sets it to -1, which is before the first leaf
nubCellsRemain = true;//this positions us on the nub
return true;
}
if (currentRowNode.hasPreviousFanNodes()) {
followPreviousFan();
descendToLastRowFromCurrentPosition();
} else {// keep going up the stack until we find previous fan positions
discardCurrentRowNode(false);
if(currentRowNode==null){
return false;
}
}
if (currentRowNode.hasOccurrences()) {// escape clause
return true;// found some values
}
}
return false;// went past the beginning
}
protected boolean isDirectlyAfterNub() {
return currentRowNode.isNub() && currentRowNode.getFanIndex()==0;
}
protected void positionAtLastRow() {
reInitFirstNode();
descendToLastRowFromCurrentPosition();
}
protected void descendToLastRowFromCurrentPosition() {
while (currentRowNode.hasChildren()) {
followLastFan();
}
}
protected void positionAtLastCell() {
positionAtLastRow();
populateLastNonRowFields();
}
}

View File

@ -0,0 +1,506 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hbase.codec.prefixtree.decode;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hbase.Cell;
import org.apache.hbase.cell.CellComparator;
import org.apache.hbase.codec.prefixtree.PrefixTreeBlockMeta;
import org.apache.hbase.codec.prefixtree.decode.column.ColumnReader;
import org.apache.hbase.codec.prefixtree.decode.row.RowNodeReader;
import org.apache.hbase.codec.prefixtree.decode.timestamp.MvccVersionDecoder;
import org.apache.hbase.codec.prefixtree.decode.timestamp.TimestampDecoder;
import org.apache.hbase.codec.prefixtree.scanner.CellScanner;
/**
* Extends PtCell and manipulates its protected fields. Could alternatively contain a PtCell and
* call get/set methods.
*
* This is an "Array" scanner to distinguish from a future "ByteBuffer" scanner. This
* implementation requires that the bytes be in a normal java byte[] for performance. The
* alternative ByteBuffer implementation would allow for accessing data in an off-heap ByteBuffer
* without copying the whole buffer on-heap.
*/
@InterfaceAudience.Private
public class PrefixTreeArrayScanner extends PrefixTreeCell implements CellScanner {
/***************** fields ********************************/
protected PrefixTreeBlockMeta blockMeta;
protected boolean beforeFirst;
protected boolean afterLast;
protected RowNodeReader[] rowNodes;
protected int rowNodeStackIndex;
protected RowNodeReader currentRowNode;
protected ColumnReader familyReader;
protected ColumnReader qualifierReader;
protected TimestampDecoder timestampDecoder;
protected MvccVersionDecoder mvccVersionDecoder;
protected boolean nubCellsRemain;
protected int currentCellIndex;
/*********************** construct ******************************/
// pass in blockMeta so we can initialize buffers big enough for all cells in the block
public PrefixTreeArrayScanner(PrefixTreeBlockMeta blockMeta, int rowTreeDepth,
int rowBufferLength, int qualifierBufferLength) {
this.rowNodes = new RowNodeReader[rowTreeDepth];
for (int i = 0; i < rowNodes.length; ++i) {
rowNodes[i] = new RowNodeReader();
}
this.rowBuffer = new byte[rowBufferLength];
this.familyBuffer = new byte[PrefixTreeBlockMeta.MAX_FAMILY_LENGTH];
this.familyReader = new ColumnReader(familyBuffer, true);
this.qualifierBuffer = new byte[qualifierBufferLength];
this.qualifierReader = new ColumnReader(qualifierBuffer, false);
this.timestampDecoder = new TimestampDecoder();
this.mvccVersionDecoder = new MvccVersionDecoder();
}
/**************** init helpers ***************************************/
/**
* Call when first accessing a block.
* @return entirely new scanner if false
*/
public boolean areBuffersBigEnough() {
if (rowNodes.length < blockMeta.getRowTreeDepth()) {
return false;
}
if (rowBuffer.length < blockMeta.getMaxRowLength()) {
return false;
}
if (qualifierBuffer.length < blockMeta.getMaxQualifierLength()) {
return false;
}
return true;
}
public void initOnBlock(PrefixTreeBlockMeta blockMeta, byte[] block, boolean includeMvccVersion) {
this.block = block;
this.blockMeta = blockMeta;
this.familyOffset = familyBuffer.length;
this.familyReader.initOnBlock(blockMeta, block);
this.qualifierOffset = qualifierBuffer.length;
this.qualifierReader.initOnBlock(blockMeta, block);
this.timestampDecoder.initOnBlock(blockMeta, block);
this.mvccVersionDecoder.initOnBlock(blockMeta, block);
this.includeMvccVersion = includeMvccVersion;
resetToBeforeFirstEntry();
}
@Override
public void resetToBeforeFirstEntry() {
beforeFirst = true;
afterLast = false;
rowNodeStackIndex = -1;
currentRowNode = null;
rowLength = 0;
familyOffset = familyBuffer.length;
familyLength = 0;
qualifierOffset = blockMeta.getMaxQualifierLength();
qualifierLength = 0;
nubCellsRemain = false;
currentCellIndex = -1;
timestamp = -1L;
type = DEFAULT_TYPE;
absoluteValueOffset = 0;//use 0 vs -1 so the cell is valid when value hasn't been initialized
valueLength = 0;// had it at -1, but that causes null Cell to add up to the wrong length
}
/**
* Call this before putting the scanner back into a pool so it doesn't hold the last used block
* in memory.
*/
public void releaseBlockReference(){
block = null;
}
/********************** CellScanner **********************/
@Override
public PrefixTreeCell getCurrent() {
if(isOutOfBounds()){
return null;
}
return this;
}
/******************* Object methods ************************/
@Override
public boolean equals(Object obj) {
//trivial override to confirm intent (findbugs)
return super.equals(obj);
}
@Override
public int hashCode() {
return super.hashCode();
}
/**
* Override PrefixTreeCell.toString() with a check to see if the current cell is valid.
*/
@Override
public String toString() {
PrefixTreeCell currentCell = getCurrent();
if(currentCell==null){
return "null";
}
return currentCell.getKeyValueString();
}
/******************* advance ***************************/
public boolean positionAtFirstCell() {
reInitFirstNode();
return next();
}
@Override
public boolean next() {
if (afterLast) {
return false;
}
if (!hasOccurrences()) {
resetToBeforeFirstEntry();
}
if (beforeFirst || isLastCellInRow()) {
nextRow();
if (afterLast) {
return false;
}
} else {
++currentCellIndex;
}
populateNonRowFields(currentCellIndex);
return true;
}
public boolean nextRow() {
nextRowInternal();
if (afterLast) {
return false;
}
populateNonRowFields(currentCellIndex);
return true;
}
/**
* This method is safe to call when the scanner is not on a fully valid row node, as in the case
* of a row token miss in the Searcher
* @return true if we are positioned on a valid row, false if past end of block
*/
protected boolean nextRowInternal() {
if (afterLast) {
return false;
}
if (beforeFirst) {
initFirstNode();
if (currentRowNode.hasOccurrences()) {
if (currentRowNode.isNub()) {
nubCellsRemain = true;
}
currentCellIndex = 0;
return true;
}
}
if (currentRowNode.isLeaf()) {
discardCurrentRowNode(true);
}
while (!afterLast) {
if (nubCellsRemain) {
nubCellsRemain = false;
}
if (currentRowNode.hasMoreFanNodes()) {
followNextFan();
if (currentRowNode.hasOccurrences()) {
currentCellIndex = 0;
return true;
}// found some values
} else {
discardCurrentRowNode(true);
}
}
return false;// went past the end
}
/**************** secondary traversal methods ******************************/
protected void reInitFirstNode() {
resetToBeforeFirstEntry();
initFirstNode();
}
protected void initFirstNode() {
int offsetIntoUnderlyingStructure = blockMeta.getAbsoluteRowOffset();
rowNodeStackIndex = 0;
currentRowNode = rowNodes[0];
currentRowNode.initOnBlock(blockMeta, block, offsetIntoUnderlyingStructure);
appendCurrentTokenToRowBuffer();
beforeFirst = false;
}
protected void followFirstFan() {
followFan(0);
}
protected void followPreviousFan() {
int nextFanPosition = currentRowNode.getFanIndex() - 1;
followFan(nextFanPosition);
}
protected void followCurrentFan() {
int currentFanPosition = currentRowNode.getFanIndex();
followFan(currentFanPosition);
}
protected void followNextFan() {
int nextFanPosition = currentRowNode.getFanIndex() + 1;
followFan(nextFanPosition);
}
protected void followLastFan() {
followFan(currentRowNode.getLastFanIndex());
}
protected void followFan(int fanIndex) {
currentRowNode.setFanIndex(fanIndex);
appendToRowBuffer(currentRowNode.getFanByte(fanIndex));
int nextOffsetIntoUnderlyingStructure = currentRowNode.getOffset()
+ currentRowNode.getNextNodeOffset(fanIndex, blockMeta);
++rowNodeStackIndex;
currentRowNode = rowNodes[rowNodeStackIndex];
currentRowNode.initOnBlock(blockMeta, block, nextOffsetIntoUnderlyingStructure);
//TODO getToken is spewing garbage
appendCurrentTokenToRowBuffer();
if (currentRowNode.isNub()) {
nubCellsRemain = true;
}
currentCellIndex = 0;
}
/**
* @param forwards which marker to set if we overflow
*/
protected void discardCurrentRowNode(boolean forwards) {
RowNodeReader rowNodeBeingPopped = currentRowNode;
--rowNodeStackIndex;// pop it off the stack
if (rowNodeStackIndex < 0) {
currentRowNode = null;
if (forwards) {
markAfterLast();
} else {
markBeforeFirst();
}
return;
}
popFromRowBuffer(rowNodeBeingPopped);
currentRowNode = rowNodes[rowNodeStackIndex];
}
protected void markBeforeFirst() {
beforeFirst = true;
afterLast = false;
currentRowNode = null;
}
protected void markAfterLast() {
beforeFirst = false;
afterLast = true;
currentRowNode = null;
}
/***************** helper methods **************************/
protected void appendCurrentTokenToRowBuffer() {
System.arraycopy(block, currentRowNode.getTokenArrayOffset(), rowBuffer, rowLength,
currentRowNode.getTokenLength());
rowLength += currentRowNode.getTokenLength();
}
protected void appendToRowBuffer(byte b) {
rowBuffer[rowLength] = b;
++rowLength;
}
protected void popFromRowBuffer(RowNodeReader rowNodeBeingPopped) {
rowLength -= rowNodeBeingPopped.getTokenLength();
--rowLength; // pop the parent's fan byte
}
protected boolean hasOccurrences() {
return currentRowNode != null && currentRowNode.hasOccurrences();
}
protected boolean isBranch() {
return currentRowNode != null && !currentRowNode.hasOccurrences()
&& currentRowNode.hasChildren();
}
protected boolean isNub() {
return currentRowNode != null && currentRowNode.hasOccurrences()
&& currentRowNode.hasChildren();
}
protected boolean isLeaf() {
return currentRowNode != null && currentRowNode.hasOccurrences()
&& !currentRowNode.hasChildren();
}
//TODO expose this in a PrefixTreeScanner interface
public boolean isBeforeFirst(){
return beforeFirst;
}
public boolean isAfterLast(){
return afterLast;
}
protected boolean isOutOfBounds(){
return beforeFirst || afterLast;
}
protected boolean isFirstCellInRow() {
return currentCellIndex == 0;
}
protected boolean isLastCellInRow() {
return currentCellIndex == currentRowNode.getLastCellIndex();
}
/********************* fill in family/qualifier/ts/type/value ************/
protected int populateNonRowFieldsAndCompareTo(int cellNum, Cell key) {
populateNonRowFields(cellNum);
return CellComparator.compareStatic(this, key);
}
protected void populateFirstNonRowFields() {
populateNonRowFields(0);
}
protected void populatePreviousNonRowFields() {
populateNonRowFields(currentCellIndex - 1);
}
protected void populateLastNonRowFields() {
populateNonRowFields(currentRowNode.getLastCellIndex());
}
protected void populateNonRowFields(int cellIndex) {
currentCellIndex = cellIndex;
populateFamily();
populateQualifier();
populateTimestamp();
populateMvccVersion();
populateType();
populateValueOffsets();
}
protected void populateFamily() {
int familyTreeIndex = currentRowNode.getFamilyOffset(currentCellIndex, blockMeta);
familyOffset = familyReader.populateBuffer(familyTreeIndex).getColumnOffset();
familyLength = familyReader.getColumnLength();
}
protected void populateQualifier() {
int qualifierTreeIndex = currentRowNode.getColumnOffset(currentCellIndex, blockMeta);
qualifierOffset = qualifierReader.populateBuffer(qualifierTreeIndex).getColumnOffset();
qualifierLength = qualifierReader.getColumnLength();
}
protected void populateTimestamp() {
if (blockMeta.isAllSameTimestamp()) {
timestamp = blockMeta.getMinTimestamp();
} else {
int timestampIndex = currentRowNode.getTimestampIndex(currentCellIndex, blockMeta);
timestamp = timestampDecoder.getLong(timestampIndex);
}
}
protected void populateMvccVersion() {
if (blockMeta.isAllSameMvccVersion()) {
mvccVersion = blockMeta.getMinMvccVersion();
} else {
int mvccVersionIndex = currentRowNode.getMvccVersionIndex(currentCellIndex,
blockMeta);
mvccVersion = mvccVersionDecoder.getMvccVersion(mvccVersionIndex);
}
}
protected void populateType() {
int typeInt;
if (blockMeta.isAllSameType()) {
typeInt = blockMeta.getAllTypes();
} else {
typeInt = currentRowNode.getType(currentCellIndex, blockMeta);
}
type = PrefixTreeCell.TYPES[typeInt];
}
protected void populateValueOffsets() {
int offsetIntoValueSection = currentRowNode.getValueOffset(currentCellIndex, blockMeta);
absoluteValueOffset = blockMeta.getAbsoluteValueOffset() + offsetIntoValueSection;
valueLength = currentRowNode.getValueLength(currentCellIndex, blockMeta);
}
/**************** getters ***************************/
public byte[] getTreeBytes() {
return block;
}
public PrefixTreeBlockMeta getBlockMeta() {
return blockMeta;
}
public int getMaxRowTreeStackNodes() {
return rowNodes.length;
}
public int getRowBufferLength() {
return rowBuffer.length;
}
public int getQualifierBufferLength() {
return qualifierBuffer.length;
}
}

View File

@ -0,0 +1,402 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hbase.codec.prefixtree.decode;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hbase.Cell;
import org.apache.hbase.cell.CellScannerPosition;
import org.apache.hbase.cell.CellTool;
import org.apache.hbase.codec.prefixtree.PrefixTreeBlockMeta;
import org.apache.hbase.codec.prefixtree.scanner.CellSearcher;
import com.google.common.primitives.UnsignedBytes;
/**
* Searcher extends the capabilities of the Scanner + ReversibleScanner to add the ability to
* position itself on a requested Cell without scanning through cells before it. The PrefixTree is
* set up to be a Trie of rows, so finding a particular row is extremely cheap.
* <p/>
* Once it finds the row, it does a binary search through the cells inside the row, which is not as
* fast as the trie search, but faster than iterating through every cell like existing block formats
* do. For this reason, this implementation is targeted towards schemas where rows are narrow enough
* to have several or many per block, and where you are generally looking for the entire row or the
* first cell. It will still be fast for wide rows or point queries, but could be improved upon.
*/
@InterfaceAudience.Private
public class PrefixTreeArraySearcher extends PrefixTreeArrayReversibleScanner implements
CellSearcher {
/*************** construct ******************************/
public PrefixTreeArraySearcher(PrefixTreeBlockMeta blockMeta, int rowTreeDepth,
int rowBufferLength, int qualifierBufferLength) {
super(blockMeta, rowTreeDepth, rowBufferLength, qualifierBufferLength);
}
/********************* CellSearcher methods *******************/
@Override
public boolean positionAt(Cell key) {
return CellScannerPosition.AT == positionAtOrAfter(key);
}
@Override
public CellScannerPosition positionAtOrBefore(Cell key) {
reInitFirstNode();
int fanIndex = -1;
while(true){
//detect row mismatch. break loop if mismatch
int currentNodeDepth = rowLength;
int rowTokenComparison = compareToCurrentToken(key);
if(rowTokenComparison != 0){
return fixRowTokenMissReverse(rowTokenComparison);
}
//exact row found, move on to qualifier & ts
if(rowMatchesAfterCurrentPosition(key)){
return positionAtQualifierTimestamp(key, true);
}
//detect dead end (no fan to descend into)
if(!currentRowNode.hasFan()){
if(hasOccurrences()){//must be leaf or nub
populateLastNonRowFields();
return CellScannerPosition.BEFORE;
}else{
//TODO i don't think this case is exercised by any tests
return fixRowFanMissReverse(0);
}
}
//keep hunting for the rest of the row
byte searchForByte = CellTool.getRowByte(key, currentNodeDepth);
fanIndex = currentRowNode.whichFanNode(searchForByte);
if(fanIndex < 0){//no matching row. return early
int insertionPoint = -fanIndex;
return fixRowFanMissReverse(insertionPoint);
}
//found a match, so dig deeper into the tree
followFan(fanIndex);
}
}
/**
* Identical workflow as positionAtOrBefore, but split them to avoid having ~10 extra
* if-statements. Priority on readability and debugability.
*/
@Override
public CellScannerPosition positionAtOrAfter(Cell key) {
reInitFirstNode();
int fanIndex = -1;
while(true){
//detect row mismatch. break loop if mismatch
int currentNodeDepth = rowLength;
int rowTokenComparison = compareToCurrentToken(key);
if(rowTokenComparison != 0){
return fixRowTokenMissForward(rowTokenComparison);
}
//exact row found, move on to qualifier & ts
if(rowMatchesAfterCurrentPosition(key)){
return positionAtQualifierTimestamp(key, false);
}
//detect dead end (no fan to descend into)
if(!currentRowNode.hasFan()){
if(hasOccurrences()){
populateFirstNonRowFields();
return CellScannerPosition.AFTER;
}else{
//TODO i don't think this case is exercised by any tests
return fixRowFanMissForward(0);
}
}
//keep hunting for the rest of the row
byte searchForByte = CellTool.getRowByte(key, currentNodeDepth);
fanIndex = currentRowNode.whichFanNode(searchForByte);
if(fanIndex < 0){//no matching row. return early
int insertionPoint = -fanIndex;
return fixRowFanMissForward(insertionPoint);
}
//found a match, so dig deeper into the tree
followFan(fanIndex);
}
}
@Override
public boolean seekForwardTo(Cell key) {
if(currentPositionIsAfter(key)){
//our position is after the requested key, so can't do anything
return false;
}
return positionAt(key);
}
@Override
public CellScannerPosition seekForwardToOrBefore(Cell key) {
//Do we even need this check or should upper layers avoid this situation. It's relatively
//expensive compared to the rest of the seek operation.
if(currentPositionIsAfter(key)){
//our position is after the requested key, so can't do anything
return CellScannerPosition.AFTER;
}
return positionAtOrBefore(key);
}
@Override
public CellScannerPosition seekForwardToOrAfter(Cell key) {
//Do we even need this check or should upper layers avoid this situation. It's relatively
//expensive compared to the rest of the seek operation.
if(currentPositionIsAfter(key)){
//our position is after the requested key, so can't do anything
return CellScannerPosition.AFTER;
}
return positionAtOrAfter(key);
}
/**
* The content of the buffers doesn't matter here, only that afterLast=true and beforeFirst=false
*/
@Override
public void positionAfterLastCell() {
resetToBeforeFirstEntry();
beforeFirst = false;
afterLast = true;
}
/***************** Object methods ***************************/
@Override
public boolean equals(Object obj) {
//trivial override to confirm intent (findbugs)
return super.equals(obj);
}
/****************** internal methods ************************/
protected boolean currentPositionIsAfter(Cell cell){
return compareTo(cell) > 0;
}
protected CellScannerPosition positionAtQualifierTimestamp(Cell key, boolean beforeOnMiss) {
int minIndex = 0;
int maxIndex = currentRowNode.getLastCellIndex();
int diff;
while (true) {
int midIndex = (maxIndex + minIndex) / 2;//don't worry about overflow
diff = populateNonRowFieldsAndCompareTo(midIndex, key);
if (diff == 0) {// found exact match
return CellScannerPosition.AT;
} else if (minIndex == maxIndex) {// even termination case
break;
} else if ((minIndex + 1) == maxIndex) {// odd termination case
diff = populateNonRowFieldsAndCompareTo(maxIndex, key);
if(diff > 0){
diff = populateNonRowFieldsAndCompareTo(minIndex, key);
}
break;
} else if (diff < 0) {// keep going forward
minIndex = currentCellIndex;
} else {// went past it, back up
maxIndex = currentCellIndex;
}
}
if (diff == 0) {
return CellScannerPosition.AT;
} else if (diff < 0) {// we are before key
if (beforeOnMiss) {
return CellScannerPosition.BEFORE;
}
if (next()) {
return CellScannerPosition.AFTER;
}
return CellScannerPosition.AFTER_LAST;
} else {// we are after key
if (!beforeOnMiss) {
return CellScannerPosition.AFTER;
}
if (previous()) {
return CellScannerPosition.BEFORE;
}
return CellScannerPosition.BEFORE_FIRST;
}
}
/**
* compare this.row to key.row but starting at the current rowLength
* @param key Cell being searched for
* @return true if row buffer contents match key.row
*/
protected boolean rowMatchesAfterCurrentPosition(Cell key) {
if (!currentRowNode.hasOccurrences()) {
return false;
}
int thatRowLength = key.getRowLength();
if (rowLength != thatRowLength) {
return false;
}
return true;
}
// TODO move part of this to Cell comparator?
/**
* Compare only the bytes within the window of the current token
* @param key
* @return return -1 if key is lessThan (before) this, 0 if equal, and 1 if key is after
*/
protected int compareToCurrentToken(Cell key) {
int startIndex = rowLength - currentRowNode.getTokenLength();
int endIndexExclusive = startIndex + currentRowNode.getTokenLength();
for (int i = startIndex; i < endIndexExclusive; ++i) {
if (i >= key.getRowLength()) {// key was shorter, so it's first
return -1;
}
byte keyByte = CellTool.getRowByte(key, i);
byte thisByte = rowBuffer[i];
if (keyByte == thisByte) {
continue;
}
return UnsignedBytes.compare(keyByte, thisByte);
}
return 0;
}
protected void followLastFansUntilExhausted(){
while(currentRowNode.hasFan()){
followLastFan();
}
}
/****************** complete seek when token mismatch ******************/
/**
* @param searcherIsAfterInputKey <0: input key is before the searcher's position<br/>
* >0: input key is after the searcher's position
*/
protected CellScannerPosition fixRowTokenMissReverse(int searcherIsAfterInputKey) {
if (searcherIsAfterInputKey < 0) {//searcher position is after the input key, so back up
boolean foundPreviousRow = previousRow(true);
if(foundPreviousRow){
populateLastNonRowFields();
return CellScannerPosition.BEFORE;
}else{
return CellScannerPosition.BEFORE_FIRST;
}
}else{//searcher position is before the input key
if(currentRowNode.hasOccurrences()){
populateFirstNonRowFields();
return CellScannerPosition.BEFORE;
}
boolean foundNextRow = nextRow();
if(foundNextRow){
return CellScannerPosition.AFTER;
}else{
return CellScannerPosition.AFTER_LAST;
}
}
}
/**
* @param searcherIsAfterInputKey <0: input key is before the searcher's position<br/>
* >0: input key is after the searcher's position
*/
protected CellScannerPosition fixRowTokenMissForward(int searcherIsAfterInputKey) {
if (searcherIsAfterInputKey < 0) {//searcher position is after the input key
if(currentRowNode.hasOccurrences()){
populateFirstNonRowFields();
return CellScannerPosition.AFTER;
}
boolean foundNextRow = nextRow();
if(foundNextRow){
return CellScannerPosition.AFTER;
}else{
return CellScannerPosition.AFTER_LAST;
}
}else{//searcher position is before the input key, so go forward
discardCurrentRowNode(true);
boolean foundNextRow = nextRow();
if(foundNextRow){
return CellScannerPosition.AFTER;
}else{
return CellScannerPosition.AFTER_LAST;
}
}
}
/****************** complete seek when fan mismatch ******************/
protected CellScannerPosition fixRowFanMissReverse(int fanInsertionPoint){
if(fanInsertionPoint == 0){//we need to back up a row
boolean foundPreviousRow = previousRow(true);//true -> position on last cell in row
if(foundPreviousRow){
populateLastNonRowFields();
return CellScannerPosition.BEFORE;
}
return CellScannerPosition.BEFORE_FIRST;
}
//follow the previous fan, but then descend recursively forward
followFan(fanInsertionPoint - 1);
followLastFansUntilExhausted();
populateLastNonRowFields();
return CellScannerPosition.BEFORE;
}
protected CellScannerPosition fixRowFanMissForward(int fanInsertionPoint){
if(fanInsertionPoint >= currentRowNode.getFanOut()){
discardCurrentRowNode(true);
if (!nextRow()) {
return CellScannerPosition.AFTER_LAST;
} else {
return CellScannerPosition.AFTER;
}
}
followFan(fanInsertionPoint);
if(hasOccurrences()){
populateFirstNonRowFields();
return CellScannerPosition.AFTER;
}
if(nextRowInternal()){
populateFirstNonRowFields();
return CellScannerPosition.AFTER;
}else{
return CellScannerPosition.AFTER_LAST;
}
}
}

View File

@ -0,0 +1,197 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hbase.codec.prefixtree.decode;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.KeyValueTool;
import org.apache.hbase.Cell;
import org.apache.hbase.cell.CellComparator;
/**
* As the PrefixTreeArrayScanner moves through the tree bytes, it changes the values in the fields
* of this class so that Cell logic can be applied, but without allocating new memory for every Cell
* iterated through.
*/
@InterfaceAudience.Private
public class PrefixTreeCell implements Cell, Comparable<Cell> {
/********************** static **********************/
public static final KeyValue.Type[] TYPES = new KeyValue.Type[256];
static {
for (KeyValue.Type type : KeyValue.Type.values()) {
TYPES[type.getCode() & 0xff] = type;
}
}
//Same as KeyValue constructor. Only used to avoid NPE's when full cell hasn't been initialized.
public static final KeyValue.Type DEFAULT_TYPE = KeyValue.Type.Put;
/******************** fields ************************/
protected byte[] block;
//we could also avoid setting the mvccVersion in the scanner/searcher, but this is simpler
protected boolean includeMvccVersion;
protected byte[] rowBuffer;
protected int rowLength;
protected byte[] familyBuffer;
protected int familyOffset;
protected int familyLength;
protected byte[] qualifierBuffer;// aligned to the end of the array
protected int qualifierOffset;
protected int qualifierLength;
protected Long timestamp;
protected Long mvccVersion;
protected KeyValue.Type type;
protected int absoluteValueOffset;
protected int valueLength;
/********************** Cell methods ******************/
/**
* For debugging. Currently creates new KeyValue to utilize its toString() method.
*/
@Override
public String toString() {
return getKeyValueString();
}
@Override
public boolean equals(Object obj) {
if (!(obj instanceof Cell)) {
return false;
}
//Temporary hack to maintain backwards compatibility with KeyValue.equals
return CellComparator.equalsIgnoreMvccVersion(this, (Cell)obj);
//TODO return CellComparator.equals(this, (Cell)obj);//see HBASE-6907
}
@Override
public int hashCode(){
//Temporary hack to maintain backwards compatibility with KeyValue.hashCode
//I don't think this is used in any hot code paths
return KeyValueTool.copyToNewKeyValue(this).hashCode();
//TODO return CellComparator.hashCode(this);//see HBASE-6907
}
@Override
public int compareTo(Cell other) {
return CellComparator.compareStatic(this, other);
}
@Override
public long getTimestamp() {
return timestamp;
}
@Override
public long getMvccVersion() {
if (!includeMvccVersion) {
return 0L;
}
return mvccVersion;
}
@Override
public int getValueLength() {
return valueLength;
}
@Override
public byte[] getRowArray() {
return rowBuffer;
}
@Override
public int getRowOffset() {
return 0;
}
@Override
public short getRowLength() {
return (short) rowLength;
}
@Override
public byte[] getFamilyArray() {
return familyBuffer;
}
@Override
public int getFamilyOffset() {
return familyOffset;
}
@Override
public byte getFamilyLength() {
return (byte) familyLength;
}
@Override
public byte[] getQualifierArray() {
return qualifierBuffer;
}
@Override
public int getQualifierOffset() {
return qualifierOffset;
}
@Override
public int getQualifierLength() {
return qualifierLength;
}
@Override
public byte[] getValueArray() {
return block;
}
@Override
public int getValueOffset() {
return absoluteValueOffset;
}
@Override
public byte getTypeByte() {
return type.getCode();
}
/************************* helper methods *************************/
/**
* Need this separate method so we can call it from subclasses' toString() methods
*/
protected String getKeyValueString(){
KeyValue kv = KeyValueTool.copyToNewKeyValue(this);
return kv.toString();
}
}

View File

@ -0,0 +1,104 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hbase.codec.prefixtree.decode.column;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hbase.codec.prefixtree.PrefixTreeBlockMeta;
import org.apache.hbase.util.vint.UFIntTool;
import org.apache.hbase.util.vint.UVIntTool;
@InterfaceAudience.Private
public class ColumnNodeReader {
/**************** fields ************************/
protected PrefixTreeBlockMeta blockMeta;
protected byte[] block;
protected byte[] columnBuffer;
protected boolean familyVsQualifier;
protected int offsetIntoBlock;
protected int tokenOffsetIntoBlock;
protected int tokenLength;
protected int parentStartPosition;
/************** construct *************************/
public ColumnNodeReader(byte[] columnBuffer, boolean familyVsQualifier) {
this.columnBuffer = columnBuffer;
this.familyVsQualifier = familyVsQualifier;
}
public void initOnBlock(PrefixTreeBlockMeta blockMeta, byte[] block) {
this.blockMeta = blockMeta;
this.block = block;
}
/************* methods *****************************/
public void positionAt(int offsetIntoBlock) {
this.offsetIntoBlock = offsetIntoBlock;
tokenLength = UVIntTool.getInt(block, offsetIntoBlock);
tokenOffsetIntoBlock = offsetIntoBlock + UVIntTool.numBytes(tokenLength);
int parentStartPositionIndex = tokenOffsetIntoBlock + tokenLength;
int offsetWidth;
if (familyVsQualifier) {
offsetWidth = blockMeta.getFamilyOffsetWidth();
} else {
offsetWidth = blockMeta.getQualifierOffsetWidth();
}
parentStartPosition = (int) UFIntTool.fromBytes(block, parentStartPositionIndex, offsetWidth);
}
public void prependTokenToBuffer(int bufferStartIndex) {
System.arraycopy(block, tokenOffsetIntoBlock, columnBuffer, bufferStartIndex, tokenLength);
}
public boolean isRoot() {
if (familyVsQualifier) {
return offsetIntoBlock == blockMeta.getAbsoluteFamilyOffset();
} else {
return offsetIntoBlock == blockMeta.getAbsoluteQualifierOffset();
}
}
/************** standard methods *********************/
@Override
public String toString() {
return super.toString() + "[" + offsetIntoBlock + "]";
}
/****************** get/set ****************************/
public int getTokenLength() {
return tokenLength;
}
public int getParentStartPosition() {
return parentStartPosition;
}
}

View File

@ -0,0 +1,104 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hbase.codec.prefixtree.decode.column;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hbase.codec.prefixtree.PrefixTreeBlockMeta;
/**
* Position one of these appropriately in the data block and you can call its methods to retrieve
* the family or qualifier at the current position.
*/
@InterfaceAudience.Private
public class ColumnReader {
/****************** fields *************************/
protected PrefixTreeBlockMeta blockMeta;
protected byte[] columnBuffer;
protected int columnOffset;
protected int columnLength;
protected boolean familyVsQualifier;
protected ColumnNodeReader columnNodeReader;
/******************** construct *******************/
public ColumnReader(byte[] columnBuffer, boolean familyVsQualifier) {
this.columnBuffer = columnBuffer;
this.familyVsQualifier = familyVsQualifier;
this.columnNodeReader = new ColumnNodeReader(columnBuffer, familyVsQualifier);
}
public void initOnBlock(PrefixTreeBlockMeta blockMeta, byte[] block) {
this.blockMeta = blockMeta;
clearColumnBuffer();
columnNodeReader.initOnBlock(blockMeta, block);
}
/********************* methods *******************/
public ColumnReader populateBuffer(int offsetIntoColumnData) {
clearColumnBuffer();
int nextRelativeOffset = offsetIntoColumnData;
while (true) {
int absoluteOffset;
if (familyVsQualifier) {
absoluteOffset = blockMeta.getAbsoluteFamilyOffset() + nextRelativeOffset;
} else {
absoluteOffset = blockMeta.getAbsoluteQualifierOffset() + nextRelativeOffset;
}
columnNodeReader.positionAt(absoluteOffset);
columnOffset -= columnNodeReader.getTokenLength();
columnLength += columnNodeReader.getTokenLength();
columnNodeReader.prependTokenToBuffer(columnOffset);
if (columnNodeReader.isRoot()) {
return this;
}
nextRelativeOffset = columnNodeReader.getParentStartPosition();
}
}
public byte[] copyBufferToNewArray() {// for testing
byte[] out = new byte[columnLength];
System.arraycopy(columnBuffer, columnOffset, out, 0, out.length);
return out;
}
public int getColumnLength() {
return columnLength;
}
public void clearColumnBuffer() {
columnOffset = columnBuffer.length;
columnLength = 0;
}
/****************************** get/set *************************************/
public int getColumnOffset() {
return columnOffset;
}
}

View File

@ -0,0 +1,267 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hbase.codec.prefixtree.decode.row;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.hbase.util.ByteRange;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hbase.codec.prefixtree.PrefixTreeBlockMeta;
import org.apache.hbase.util.vint.UFIntTool;
import org.apache.hbase.util.vint.UVIntTool;
/**
* Position one of these appropriately in the data block and you can call its methods to retrieve
* information necessary to decode the cells in the row.
*/
@InterfaceAudience.Private
public class RowNodeReader {
/************* fields ***********************************/
protected byte[] block;
protected int offset;
protected int fanIndex;
protected int numCells;
protected int tokenOffset;
protected int tokenLength;
protected int fanOffset;
protected int fanOut;
protected int familyOffsetsOffset;
protected int qualifierOffsetsOffset;
protected int timestampIndexesOffset;
protected int mvccVersionIndexesOffset;
protected int operationTypesOffset;
protected int valueOffsetsOffset;
protected int valueLengthsOffset;
protected int nextNodeOffsetsOffset;
/******************* construct **************************/
public void initOnBlock(PrefixTreeBlockMeta blockMeta, byte[] block, int offset) {
this.block = block;
this.offset = offset;
resetFanIndex();
this.tokenLength = UVIntTool.getInt(block, offset);
this.tokenOffset = offset + UVIntTool.numBytes(tokenLength);
this.fanOut = UVIntTool.getInt(block, tokenOffset + tokenLength);
this.fanOffset = tokenOffset + tokenLength + UVIntTool.numBytes(fanOut);
this.numCells = UVIntTool.getInt(block, fanOffset + fanOut);
this.familyOffsetsOffset = fanOffset + fanOut + UVIntTool.numBytes(numCells);
this.qualifierOffsetsOffset = familyOffsetsOffset + numCells * blockMeta.getFamilyOffsetWidth();
this.timestampIndexesOffset = qualifierOffsetsOffset + numCells
* blockMeta.getQualifierOffsetWidth();
this.mvccVersionIndexesOffset = timestampIndexesOffset + numCells
* blockMeta.getTimestampIndexWidth();
this.operationTypesOffset = mvccVersionIndexesOffset + numCells
* blockMeta.getMvccVersionIndexWidth();
this.valueOffsetsOffset = operationTypesOffset + numCells * blockMeta.getKeyValueTypeWidth();
this.valueLengthsOffset = valueOffsetsOffset + numCells * blockMeta.getValueOffsetWidth();
this.nextNodeOffsetsOffset = valueLengthsOffset + numCells * blockMeta.getValueLengthWidth();
}
/******************** methods ****************************/
public boolean isLeaf() {
return fanOut == 0;
}
public boolean isNub() {
return fanOut > 0 && numCells > 0;
}
public boolean isBranch() {
return fanOut > 0 && numCells == 0;
}
public boolean hasOccurrences() {
return numCells > 0;
}
public int getTokenArrayOffset(){
return tokenOffset;
}
public int getTokenLength() {
return tokenLength;
}
public byte getFanByte(int i) {
return block[fanOffset + i];
}
/**
* for debugging
*/
protected String getFanByteReadable(int i){
return Bytes.toStringBinary(block, fanOffset + i, 1);
}
public int getFamilyOffset(int index, PrefixTreeBlockMeta blockMeta) {
int fIntWidth = blockMeta.getFamilyOffsetWidth();
int startIndex = familyOffsetsOffset + fIntWidth * index;
return (int) UFIntTool.fromBytes(block, startIndex, fIntWidth);
}
public int getColumnOffset(int index, PrefixTreeBlockMeta blockMeta) {
int fIntWidth = blockMeta.getQualifierOffsetWidth();
int startIndex = qualifierOffsetsOffset + fIntWidth * index;
return (int) UFIntTool.fromBytes(block, startIndex, fIntWidth);
}
public int getTimestampIndex(int index, PrefixTreeBlockMeta blockMeta) {
int fIntWidth = blockMeta.getTimestampIndexWidth();
int startIndex = timestampIndexesOffset + fIntWidth * index;
return (int) UFIntTool.fromBytes(block, startIndex, fIntWidth);
}
public int getMvccVersionIndex(int index, PrefixTreeBlockMeta blockMeta) {
int fIntWidth = blockMeta.getMvccVersionIndexWidth();
int startIndex = mvccVersionIndexesOffset + fIntWidth * index;
return (int) UFIntTool.fromBytes(block, startIndex, fIntWidth);
}
public int getType(int index, PrefixTreeBlockMeta blockMeta) {
if (blockMeta.isAllSameType()) {
return blockMeta.getAllTypes();
}
return block[operationTypesOffset + index];
}
public int getValueOffset(int index, PrefixTreeBlockMeta blockMeta) {
int fIntWidth = blockMeta.getValueOffsetWidth();
int startIndex = valueOffsetsOffset + fIntWidth * index;
int offset = (int) UFIntTool.fromBytes(block, startIndex, fIntWidth);
return offset;
}
public int getValueLength(int index, PrefixTreeBlockMeta blockMeta) {
int fIntWidth = blockMeta.getValueLengthWidth();
int startIndex = valueLengthsOffset + fIntWidth * index;
int length = (int) UFIntTool.fromBytes(block, startIndex, fIntWidth);
return length;
}
public int getNextNodeOffset(int index, PrefixTreeBlockMeta blockMeta) {
int fIntWidth = blockMeta.getNextNodeOffsetWidth();
int startIndex = nextNodeOffsetsOffset + fIntWidth * index;
return (int) UFIntTool.fromBytes(block, startIndex, fIntWidth);
}
public String getBranchNubLeafIndicator() {
if (isNub()) {
return "N";
}
return isBranch() ? "B" : "L";
}
public boolean hasChildren() {
return fanOut > 0;
}
public int getLastFanIndex() {
return fanOut - 1;
}
public int getLastCellIndex() {
return numCells - 1;
}
public int getNumCells() {
return numCells;
}
public int getFanOut() {
return fanOut;
}
public byte[] getToken() {
// TODO pass in reusable ByteRange
return new ByteRange(block, tokenOffset, tokenLength).deepCopyToNewArray();
}
public int getOffset() {
return offset;
}
public int whichFanNode(byte searchForByte) {
if( ! hasFan()){
throw new IllegalStateException("This row node has no fan, so can't search it");
}
int fanIndexInBlock = Bytes.unsignedBinarySearch(block, fanOffset, fanOffset + fanOut,
searchForByte);
if (fanIndexInBlock >= 0) {// found it, but need to adjust for position of fan in overall block
return fanIndexInBlock - fanOffset;
}
return fanIndexInBlock + fanOffset + 1;// didn't find it, so compensate in reverse
}
public void resetFanIndex() {
fanIndex = -1;// just the way the logic currently works
}
public int getFanIndex() {
return fanIndex;
}
public void setFanIndex(int fanIndex) {
this.fanIndex = fanIndex;
}
public boolean hasFan(){
return fanOut > 0;
}
public boolean hasPreviousFanNodes() {
return fanOut > 0 && fanIndex > 0;
}
public boolean hasMoreFanNodes() {
return fanIndex < getLastFanIndex();
}
public boolean isOnLastFanNode() {
return !hasMoreFanNodes();
}
/*************** standard methods **************************/
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
sb.append("fan:" + Bytes.toStringBinary(block, fanOffset, fanOut));
sb.append(",token:" + Bytes.toStringBinary(block, tokenOffset, tokenLength));
sb.append(",numCells:" + numCells);
sb.append(",fanIndex:"+fanIndex);
if(fanIndex>=0){
sb.append("("+getFanByteReadable(fanIndex)+")");
}
return sb.toString();
}
}

View File

@ -0,0 +1,57 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hbase.codec.prefixtree.decode.timestamp;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hbase.codec.prefixtree.PrefixTreeBlockMeta;
import org.apache.hbase.util.vint.UFIntTool;
/**
* Given a block and its blockMeta, this will decode the MvccVersion for the i-th Cell in the block.
*/
@InterfaceAudience.Private
public class MvccVersionDecoder {
protected PrefixTreeBlockMeta blockMeta;
protected byte[] block;
/************** construct ***********************/
public MvccVersionDecoder() {
}
public void initOnBlock(PrefixTreeBlockMeta blockMeta, byte[] block) {
this.block = block;
this.blockMeta = blockMeta;
}
/************** methods *************************/
public long getMvccVersion(int index) {
if (blockMeta.getMvccVersionIndexWidth() == 0) {//all mvccVersions in the block were identical
return blockMeta.getMinMvccVersion();
}
int startIndex = blockMeta.getAbsoluteMvccVersionOffset()
+ blockMeta.getMvccVersionDeltaWidth() * index;
long delta = UFIntTool.fromBytes(block, startIndex, blockMeta.getMvccVersionDeltaWidth());
return blockMeta.getMinMvccVersion() + delta;
}
}

View File

@ -0,0 +1,57 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hbase.codec.prefixtree.decode.timestamp;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hbase.codec.prefixtree.PrefixTreeBlockMeta;
import org.apache.hbase.util.vint.UFIntTool;
/**
* Given a block and its blockMeta, this will decode the timestamp for the i-th Cell in the block.
*/
@InterfaceAudience.Private
public class TimestampDecoder {
protected PrefixTreeBlockMeta blockMeta;
protected byte[] block;
/************** construct ***********************/
public TimestampDecoder() {
}
public void initOnBlock(PrefixTreeBlockMeta blockMeta, byte[] block) {
this.block = block;
this.blockMeta = blockMeta;
}
/************** methods *************************/
public long getLong(int index) {
if (blockMeta.getTimestampIndexWidth() == 0) {//all timestamps in the block were identical
return blockMeta.getMinTimestamp();
}
int startIndex = blockMeta.getAbsoluteTimestampOffset() + blockMeta.getTimestampDeltaWidth()
* index;
long delta = UFIntTool.fromBytes(block, startIndex, blockMeta.getTimestampDeltaWidth());
return blockMeta.getMinTimestamp() + delta;
}
}

View File

@ -0,0 +1,56 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hbase.codec.prefixtree.encode;
import java.io.OutputStream;
import org.apache.hadoop.classification.InterfaceAudience;
/**
* Retrieve PrefixTreeEncoders from this factory which handles pooling them and preparing the
* ones retrieved from the pool for usage.
*/
@InterfaceAudience.Private
public class EncoderFactory {
private static final EncoderPool POOL = new ThreadLocalEncoderPool();
public static PrefixTreeEncoder checkOut(OutputStream outputStream, boolean includeMvccVersion) {
return POOL.checkOut(outputStream, includeMvccVersion);
}
public static void checkIn(PrefixTreeEncoder encoder) {
POOL.checkIn(encoder);
}
/**************************** helper ******************************/
protected static PrefixTreeEncoder prepareEncoder(PrefixTreeEncoder encoder,
OutputStream outputStream, boolean includeMvccVersion) {
PrefixTreeEncoder ret = encoder;
if (encoder == null) {
ret = new PrefixTreeEncoder(outputStream, includeMvccVersion);
}
ret.reset(outputStream, includeMvccVersion);
return ret;
}
}

View File

@ -0,0 +1,32 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hbase.codec.prefixtree.encode;
import java.io.OutputStream;
import org.apache.hadoop.classification.InterfaceAudience;
@InterfaceAudience.Private
public interface EncoderPool {
PrefixTreeEncoder checkOut(OutputStream outputStream, boolean includeMvccVersion);
void checkIn(PrefixTreeEncoder encoder);
}

View File

@ -0,0 +1,494 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hbase.codec.prefixtree.encode;
import java.io.IOException;
import java.io.OutputStream;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.hbase.KeyValueTool;
import org.apache.hadoop.hbase.util.ArrayUtils;
import org.apache.hadoop.hbase.util.ByteRange;
import org.apache.hadoop.io.WritableUtils;
import org.apache.hbase.Cell;
import org.apache.hbase.cell.CellOutputStream;
import org.apache.hbase.cell.CellTool;
import org.apache.hbase.codec.prefixtree.PrefixTreeBlockMeta;
import org.apache.hbase.codec.prefixtree.encode.column.ColumnSectionWriter;
import org.apache.hbase.codec.prefixtree.encode.other.CellTypeEncoder;
import org.apache.hbase.codec.prefixtree.encode.other.LongEncoder;
import org.apache.hbase.codec.prefixtree.encode.row.RowSectionWriter;
import org.apache.hbase.codec.prefixtree.encode.tokenize.Tokenizer;
import org.apache.hbase.util.byterange.ByteRangeSet;
import org.apache.hbase.util.byterange.impl.ByteRangeHashSet;
import org.apache.hbase.util.byterange.impl.ByteRangeTreeSet;
import org.apache.hbase.util.vint.UFIntTool;
/**
* This is the primary class for converting a CellOutputStream into an encoded byte[]. As Cells are
* added they are completely copied into the various encoding structures. This is important because
* usually the cells being fed in during compactions will be transient.<br/>
* <br/>
* Usage:<br/>
* 1) constructor<br/>
* 4) append cells in sorted order: write(Cell cell)<br/>
* 5) flush()<br/>
*/
@InterfaceAudience.Private
public class PrefixTreeEncoder implements CellOutputStream {
/**************** static ************************/
protected static final Log LOG = LogFactory.getLog(PrefixTreeEncoder.class);
//future-proof where HBase supports multiple families in a data block.
public static final boolean MULITPLE_FAMILIES_POSSIBLE = false;
private static final boolean USE_HASH_COLUMN_SORTER = true;
private static final int INITIAL_PER_CELL_ARRAY_SIZES = 256;
private static final int VALUE_BUFFER_INIT_SIZE = 64 * 1024;
/**************** fields *************************/
protected long numResets = 0L;
protected OutputStream outputStream;
/*
* Cannot change during a single block's encoding. If false, then substitute incoming Cell's
* mvccVersion with zero and write out the block as usual.
*/
protected boolean includeMvccVersion;
/*
* reusable ByteRanges used for communicating with the sorters/compilers
*/
protected ByteRange rowRange;
protected ByteRange familyRange;
protected ByteRange qualifierRange;
/*
* incoming Cell fields are copied into these arrays
*/
protected long[] timestamps;
protected long[] mvccVersions;
protected byte[] typeBytes;
protected int[] valueOffsets;
protected byte[] values;
protected PrefixTreeBlockMeta blockMeta;
/*
* Sub-encoders for the simple long/byte fields of a Cell. Add to these as each cell arrives and
* compile before flushing.
*/
protected LongEncoder timestampEncoder;
protected LongEncoder mvccVersionEncoder;
protected CellTypeEncoder cellTypeEncoder;
/*
* Structures used for collecting families and qualifiers, de-duplicating them, and sorting them
* so they can be passed to the tokenizers. Unlike row keys where we can detect duplicates by
* comparing only with the previous row key, families and qualifiers can arrive in unsorted order
* in blocks spanning multiple rows. We must collect them all into a set to de-duplicate them.
*/
protected ByteRangeSet familyDeduplicator;
protected ByteRangeSet qualifierDeduplicator;
/*
* Feed sorted byte[]s into these tokenizers which will convert the byte[]s to an in-memory
* trie structure with nodes connected by memory pointers (not serializable yet).
*/
protected Tokenizer rowTokenizer;
protected Tokenizer familyTokenizer;
protected Tokenizer qualifierTokenizer;
/*
* Writers take an in-memory trie, sort the nodes, calculate offsets and lengths, and write
* all information to an output stream of bytes that can be stored on disk.
*/
protected RowSectionWriter rowWriter;
protected ColumnSectionWriter familyWriter;
protected ColumnSectionWriter qualifierWriter;
/*
* Integers used for counting cells and bytes. We keep track of the size of the Cells as if they
* were full KeyValues because some parts of HBase like to know the "unencoded size".
*/
protected int totalCells = 0;
protected int totalUnencodedBytes = 0;//numBytes if the cells were KeyValues
protected int totalValueBytes = 0;
protected int maxValueLength = 0;
protected int totalBytes = 0;//
/***************** construct ***********************/
public PrefixTreeEncoder(OutputStream outputStream, boolean includeMvccVersion) {
// used during cell accumulation
this.blockMeta = new PrefixTreeBlockMeta();
this.rowRange = new ByteRange();
this.familyRange = new ByteRange();
this.qualifierRange = new ByteRange();
this.timestamps = new long[INITIAL_PER_CELL_ARRAY_SIZES];
this.mvccVersions = new long[INITIAL_PER_CELL_ARRAY_SIZES];
this.typeBytes = new byte[INITIAL_PER_CELL_ARRAY_SIZES];
this.valueOffsets = new int[INITIAL_PER_CELL_ARRAY_SIZES];
this.values = new byte[VALUE_BUFFER_INIT_SIZE];
// used during compilation
this.familyDeduplicator = USE_HASH_COLUMN_SORTER ? new ByteRangeHashSet()
: new ByteRangeTreeSet();
this.qualifierDeduplicator = USE_HASH_COLUMN_SORTER ? new ByteRangeHashSet()
: new ByteRangeTreeSet();
this.timestampEncoder = new LongEncoder();
this.mvccVersionEncoder = new LongEncoder();
this.cellTypeEncoder = new CellTypeEncoder();
this.rowTokenizer = new Tokenizer();
this.familyTokenizer = new Tokenizer();
this.qualifierTokenizer = new Tokenizer();
this.rowWriter = new RowSectionWriter();
this.familyWriter = new ColumnSectionWriter();
this.qualifierWriter = new ColumnSectionWriter();
reset(outputStream, includeMvccVersion);
}
public void reset(OutputStream outputStream, boolean includeMvccVersion) {
++numResets;
this.includeMvccVersion = includeMvccVersion;
this.outputStream = outputStream;
valueOffsets[0] = 0;
familyDeduplicator.reset();
qualifierDeduplicator.reset();
rowTokenizer.reset();
timestampEncoder.reset();
mvccVersionEncoder.reset();
cellTypeEncoder.reset();
familyTokenizer.reset();
qualifierTokenizer.reset();
rowWriter.reset();
familyWriter.reset();
qualifierWriter.reset();
totalCells = 0;
totalUnencodedBytes = 0;
totalValueBytes = 0;
maxValueLength = 0;
totalBytes = 0;
}
/**
* Check that the arrays used to hold cell fragments are large enough for the cell that is being
* added. Since the PrefixTreeEncoder is cached between uses, these arrays may grow during the
* first few block encodings but should stabilize quickly.
*/
protected void ensurePerCellCapacities() {
int currentCapacity = valueOffsets.length;
int neededCapacity = totalCells + 2;// some things write one index ahead. +2 to be safe
if (neededCapacity < currentCapacity) {
return;
}
int padding = neededCapacity;//this will double the array size
timestamps = ArrayUtils.growIfNecessary(timestamps, neededCapacity, padding);
mvccVersions = ArrayUtils.growIfNecessary(mvccVersions, neededCapacity, padding);
typeBytes = ArrayUtils.growIfNecessary(typeBytes, neededCapacity, padding);
valueOffsets = ArrayUtils.growIfNecessary(valueOffsets, neededCapacity, padding);
}
/******************** CellOutputStream methods *************************/
/**
* Note: Unused until support is added to the scanner/heap
* <p/>
* The following method are optimized versions of write(Cell cell). The result should be
* identical, however the implementation may be able to execute them much more efficiently because
* it does not need to compare the unchanged fields with the previous cell's.
* <p/>
* Consider the benefits during compaction when paired with a CellScanner that is also aware of
* row boundaries. The CellScanner can easily use these methods instead of blindly passing Cells
* to the write(Cell cell) method.
* <p/>
* The savings of skipping duplicate row detection are significant with long row keys. A
* DataBlockEncoder may store a row key once in combination with a count of how many cells are in
* the row. With a 100 byte row key, we can replace 100 byte comparisons with a single increment
* of the counter, and that is for every cell in the row.
*/
/**
* Add a Cell to the output stream but repeat the previous row.
*/
//@Override
public void writeWithRepeatRow(Cell cell) {
ensurePerCellCapacities();//can we optimize away some of this?
//save a relatively expensive row comparison, incrementing the row's counter instead
rowTokenizer.incrementNumOccurrencesOfLatestValue();
addFamilyPart(cell);
addQualifierPart(cell);
addAfterRowFamilyQualifier(cell);
}
@Override
public void write(Cell cell) {
ensurePerCellCapacities();
rowTokenizer.addSorted(CellTool.fillRowRange(cell, rowRange));
addFamilyPart(cell);
addQualifierPart(cell);
addAfterRowFamilyQualifier(cell);
}
/***************** internal add methods ************************/
private void addAfterRowFamilyQualifier(Cell cell){
// timestamps
timestamps[totalCells] = cell.getTimestamp();
timestampEncoder.add(cell.getTimestamp());
// memstore timestamps
if (includeMvccVersion) {
mvccVersions[totalCells] = cell.getMvccVersion();
mvccVersionEncoder.add(cell.getMvccVersion());
totalUnencodedBytes += WritableUtils.getVIntSize(cell.getMvccVersion());
}else{
//must overwrite in case there was a previous version in this array slot
mvccVersions[totalCells] = 0L;
if(totalCells == 0){//only need to do this for the first cell added
mvccVersionEncoder.add(0L);
}
//totalUncompressedBytes += 0;//mvccVersion takes zero bytes when disabled
}
// types
typeBytes[totalCells] = cell.getTypeByte();
cellTypeEncoder.add(cell.getTypeByte());
// values
totalValueBytes += cell.getValueLength();
// double the array each time we run out of space
values = ArrayUtils.growIfNecessary(values, totalValueBytes, 2 * totalValueBytes);
CellTool.copyValueTo(cell, values, valueOffsets[totalCells]);
if (cell.getValueLength() > maxValueLength) {
maxValueLength = cell.getValueLength();
}
valueOffsets[totalCells + 1] = totalValueBytes;
// general
totalUnencodedBytes += KeyValueTool.length(cell);
++totalCells;
}
private void addFamilyPart(Cell cell) {
if (MULITPLE_FAMILIES_POSSIBLE || totalCells == 0) {
CellTool.fillFamilyRange(cell, familyRange);
familyDeduplicator.add(familyRange);
}
}
private void addQualifierPart(Cell cell) {
CellTool.fillQualifierRange(cell, qualifierRange);
qualifierDeduplicator.add(qualifierRange);
}
/****************** compiling/flushing ********************/
/**
* Expensive method. The second half of the encoding work happens here.
*
* Take all the separate accumulated data structures and turn them into a single stream of bytes
* which is written to the outputStream.
*/
@Override
public void flush() throws IOException {
compile();
// do the actual flushing to the output stream. Order matters.
blockMeta.writeVariableBytesToOutputStream(outputStream);
rowWriter.writeBytes(outputStream);
familyWriter.writeBytes(outputStream);
qualifierWriter.writeBytes(outputStream);
timestampEncoder.writeBytes(outputStream);
mvccVersionEncoder.writeBytes(outputStream);
//CellType bytes are in the row nodes. there is no additional type section
outputStream.write(values, 0, totalValueBytes);
}
/**
* Now that all the cells have been added, do the work to reduce them to a series of byte[]
* fragments that are ready to be written to the output stream.
*/
protected void compile(){
blockMeta.setNumKeyValueBytes(totalUnencodedBytes);
int lastValueOffset = valueOffsets[totalCells];
blockMeta.setValueOffsetWidth(UFIntTool.numBytes(lastValueOffset));
blockMeta.setValueLengthWidth(UFIntTool.numBytes(maxValueLength));
blockMeta.setNumValueBytes(totalValueBytes);
totalBytes += totalValueBytes;
//these compile methods will add to totalBytes
compileTypes();
compileMvccVersions();
compileTimestamps();
compileQualifiers();
compileFamilies();
compileRows();
int numMetaBytes = blockMeta.calculateNumMetaBytes();
blockMeta.setNumMetaBytes(numMetaBytes);
totalBytes += numMetaBytes;
}
/**
* The following "compile" methods do any intermediate work necessary to transform the cell
* fragments collected during the writing phase into structures that are ready to write to the
* outputStream.
* <p/>
* The family and qualifier treatment is almost identical, as is timestamp and mvccVersion.
*/
protected void compileTypes() {
blockMeta.setAllSameType(cellTypeEncoder.areAllSameType());
if(cellTypeEncoder.areAllSameType()){
blockMeta.setAllTypes(cellTypeEncoder.getOnlyType());
}
}
protected void compileMvccVersions() {
mvccVersionEncoder.compile();
blockMeta.setMvccVersionFields(mvccVersionEncoder);
int numMvccVersionBytes = mvccVersionEncoder.getOutputArrayLength();
totalBytes += numMvccVersionBytes;
}
protected void compileTimestamps() {
timestampEncoder.compile();
blockMeta.setTimestampFields(timestampEncoder);
int numTimestampBytes = timestampEncoder.getOutputArrayLength();
totalBytes += numTimestampBytes;
}
protected void compileQualifiers() {
blockMeta.setNumUniqueQualifiers(qualifierDeduplicator.size());
qualifierDeduplicator.compile();
qualifierTokenizer.addAll(qualifierDeduplicator.getSortedRanges());
qualifierWriter.reconstruct(blockMeta, qualifierTokenizer, false);
qualifierWriter.compile();
int numQualifierBytes = qualifierWriter.getNumBytes();
blockMeta.setNumQualifierBytes(numQualifierBytes);
totalBytes += numQualifierBytes;
}
protected void compileFamilies() {
blockMeta.setNumUniqueFamilies(familyDeduplicator.size());
familyDeduplicator.compile();
familyTokenizer.addAll(familyDeduplicator.getSortedRanges());
familyWriter.reconstruct(blockMeta, familyTokenizer, true);
familyWriter.compile();
int numFamilyBytes = familyWriter.getNumBytes();
blockMeta.setNumFamilyBytes(numFamilyBytes);
totalBytes += numFamilyBytes;
}
protected void compileRows() {
rowWriter.reconstruct(this);
rowWriter.compile();
int numRowBytes = rowWriter.getNumBytes();
blockMeta.setNumRowBytes(numRowBytes);
blockMeta.setRowTreeDepth(rowTokenizer.getTreeDepth());
totalBytes += numRowBytes;
}
/********************* convenience getters ********************************/
public long getValueOffset(int index) {
return valueOffsets[index];
}
public int getValueLength(int index) {
return (int) (valueOffsets[index + 1] - valueOffsets[index]);
}
/************************* get/set *************************************/
public PrefixTreeBlockMeta getBlockMeta() {
return blockMeta;
}
public Tokenizer getRowTokenizer() {
return rowTokenizer;
}
public LongEncoder getTimestampEncoder() {
return timestampEncoder;
}
public int getTotalBytes() {
return totalBytes;
}
public long[] getTimestamps() {
return timestamps;
}
public long[] getMvccVersions() {
return mvccVersions;
}
public byte[] getTypeBytes() {
return typeBytes;
}
public LongEncoder getMvccVersionEncoder() {
return mvccVersionEncoder;
}
public ByteRangeSet getFamilySorter() {
return familyDeduplicator;
}
public ByteRangeSet getQualifierSorter() {
return qualifierDeduplicator;
}
public ColumnSectionWriter getFamilyWriter() {
return familyWriter;
}
public ColumnSectionWriter getQualifierWriter() {
return qualifierWriter;
}
public RowSectionWriter getRowWriter() {
return rowWriter;
}
public ByteRange getValueByteRange() {
return new ByteRange(values, 0, totalValueBytes);
}
}

View File

@ -0,0 +1,64 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hbase.codec.prefixtree.encode;
import java.io.OutputStream;
import org.apache.hadoop.classification.InterfaceAudience;
/**
* Pool to enable reusing the Encoder objects which can consist of thousands of smaller objects and
* would be more garbage than the data in the block. A new encoder is needed for each block in
* a flush, compaction, RPC response, etc.
*
* It is not a pool in the traditional sense, but implements the semantics of a traditional pool
* via ThreadLocals to avoid sharing between threads. Sharing between threads would not be
* very expensive given that it's accessed per-block, but this is just as easy.
*
* This pool implementation assumes there is a one-to-one mapping between a single thread and a
* single flush or compaction.
*/
@InterfaceAudience.Private
public class ThreadLocalEncoderPool implements EncoderPool{
private static final ThreadLocal<PrefixTreeEncoder> ENCODER
= new ThreadLocal<PrefixTreeEncoder>();
/**
* Get the encoder attached to the current ThreadLocal, or create a new one and attach it to the
* current thread.
*/
@Override
public PrefixTreeEncoder checkOut(OutputStream os, boolean includeMvccVersion) {
PrefixTreeEncoder builder = ENCODER.get();
builder = EncoderFactory.prepareEncoder(builder, os, includeMvccVersion);
ENCODER.set(builder);
return builder;
}
@Override
public void checkIn(PrefixTreeEncoder encoder) {
// attached to thread on checkOut, so shouldn't need to do anything here
// do we need to worry about detaching encoders from compaction threads or are the same threads
// used over and over
}
}

View File

@ -0,0 +1,131 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hbase.codec.prefixtree.encode.column;
import java.io.IOException;
import java.io.OutputStream;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.hbase.util.ByteRange;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.Strings;
import org.apache.hbase.codec.prefixtree.PrefixTreeBlockMeta;
import org.apache.hbase.codec.prefixtree.encode.tokenize.TokenizerNode;
import org.apache.hbase.util.vint.UFIntTool;
import org.apache.hbase.util.vint.UVIntTool;
/**
* Column nodes can be either family nodes or qualifier nodes, as both sections encode similarly.
* The family and qualifier sections of the data block are made of 1 or more of these nodes.
* <p/>
* Each node is composed of 3 sections:<br/>
* <li>tokenLength: UVInt (normally 1 byte) indicating the number of token bytes
* <li>token[]: the actual token bytes
* <li>parentStartPosition: the offset of the next node from the start of the family or qualifier
* section
*/
@InterfaceAudience.Private
public class ColumnNodeWriter{
/************* fields ****************************/
protected TokenizerNode builderNode;
protected PrefixTreeBlockMeta blockMeta;
protected boolean familyVsQualifier;
protected int tokenLength;
protected byte[] token;
protected int parentStartPosition;
/*************** construct **************************/
public ColumnNodeWriter(PrefixTreeBlockMeta blockMeta, TokenizerNode builderNode,
boolean familyVsQualifier) {
this.blockMeta = blockMeta;
this.builderNode = builderNode;
this.familyVsQualifier = familyVsQualifier;
calculateTokenLength();
}
/************* methods *******************************/
public boolean isRoot() {
return parentStartPosition == 0;
}
private void calculateTokenLength() {
tokenLength = builderNode.getTokenLength();
token = new byte[tokenLength];
}
/**
* This method is called before blockMeta.qualifierOffsetWidth is known, so we pass in a
* placeholder.
* @param offsetWidthPlaceholder the placeholder
* @return node width
*/
public int getWidthUsingPlaceholderForOffsetWidth(int offsetWidthPlaceholder) {
int width = 0;
width += UVIntTool.numBytes(tokenLength);
width += token.length;
width += offsetWidthPlaceholder;
return width;
}
public void writeBytes(OutputStream os) throws IOException {
int parentOffsetWidth;
if (familyVsQualifier) {
parentOffsetWidth = blockMeta.getFamilyOffsetWidth();
} else {
parentOffsetWidth = blockMeta.getQualifierOffsetWidth();
}
UVIntTool.writeBytes(tokenLength, os);
os.write(token);
UFIntTool.writeBytes(parentOffsetWidth, parentStartPosition, os);
}
public void setTokenBytes(ByteRange source) {
source.deepCopySubRangeTo(0, tokenLength, token, 0);
}
/****************** standard methods ************************/
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
sb.append(Strings.padFront(builderNode.getOutputArrayOffset() + "", ' ', 3) + ",");
sb.append("[");
sb.append(Bytes.toString(token));
sb.append("]->");
sb.append(parentStartPosition);
return sb.toString();
}
/************************** get/set ***********************/
public void setParentStartPosition(int parentStartPosition) {
this.parentStartPosition = parentStartPosition;
}
}

View File

@ -0,0 +1,201 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hbase.codec.prefixtree.encode.column;
import java.io.IOException;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.List;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.hbase.util.CollectionUtils;
import org.apache.hbase.codec.prefixtree.PrefixTreeBlockMeta;
import org.apache.hbase.codec.prefixtree.encode.tokenize.Tokenizer;
import org.apache.hbase.codec.prefixtree.encode.tokenize.TokenizerNode;
import org.apache.hbase.util.vint.UFIntTool;
import com.google.common.collect.Lists;
/**
* Takes the tokenized family or qualifier data and flattens it into a stream of bytes. The family
* section is written after the row section, and qualifier section after family section.
* <p/>
* The family and qualifier tries, or "column tries", are structured differently than the row trie.
* The trie cannot be reassembled without external data about the offsets of the leaf nodes, and
* these external pointers are stored in the nubs and leaves of the row trie. For each cell in a
* row, the row trie contains a list of offsets into the column sections (along with pointers to
* timestamps and other per-cell fields). These offsets point to the last column node/token that
* comprises the column name. To assemble the column name, the trie is traversed in reverse (right
* to left), with the rightmost tokens pointing to the start of their "parent" node which is the
* node to the left.
* <p/>
* This choice was made to reduce the size of the column trie by storing the minimum amount of
* offset data. As a result, to find a specific qualifier within a row, you must do a binary search
* of the column nodes, reassembling each one as you search. Future versions of the PrefixTree might
* encode the columns in both a forward and reverse trie, which would convert binary searches into
* more efficient trie searches which would be beneficial for wide rows.
*/
@InterfaceAudience.Private
public class ColumnSectionWriter {
public static final int EXPECTED_NUBS_PLUS_LEAVES = 100;
/****************** fields ****************************/
private PrefixTreeBlockMeta blockMeta;
private boolean familyVsQualifier;
private Tokenizer tokenizer;
private int numBytes = 0;
private ArrayList<TokenizerNode> nonLeaves;
private ArrayList<TokenizerNode> leaves;
private ArrayList<TokenizerNode> allNodes;
private ArrayList<ColumnNodeWriter> columnNodeWriters;
private List<Integer> outputArrayOffsets;
/*********************** construct *********************/
public ColumnSectionWriter() {
this.nonLeaves = Lists.newArrayList();
this.leaves = Lists.newArrayList();
this.outputArrayOffsets = Lists.newArrayList();
}
public ColumnSectionWriter(PrefixTreeBlockMeta blockMeta, Tokenizer builder,
boolean familyVsQualifier) {
this();// init collections
reconstruct(blockMeta, builder, familyVsQualifier);
}
public void reconstruct(PrefixTreeBlockMeta blockMeta, Tokenizer builder,
boolean familyVsQualifier) {
this.blockMeta = blockMeta;
this.tokenizer = builder;
this.familyVsQualifier = familyVsQualifier;
}
public void reset() {
numBytes = 0;
nonLeaves.clear();
leaves.clear();
outputArrayOffsets.clear();
}
/****************** methods *******************************/
public ColumnSectionWriter compile() {
if (familyVsQualifier) {
// do nothing. max family length fixed at Byte.MAX_VALUE
} else {
blockMeta.setMaxQualifierLength(tokenizer.getMaxElementLength());
}
tokenizer.setNodeFirstInsertionIndexes();
tokenizer.appendNodes(nonLeaves, true, false);
tokenizer.appendNodes(leaves, false, true);
allNodes = Lists.newArrayListWithCapacity(nonLeaves.size() + leaves.size());
allNodes.addAll(nonLeaves);
allNodes.addAll(leaves);
columnNodeWriters = Lists.newArrayListWithCapacity(CollectionUtils.nullSafeSize(allNodes));
for (int i = 0; i < allNodes.size(); ++i) {
TokenizerNode node = allNodes.get(i);
columnNodeWriters.add(new ColumnNodeWriter(blockMeta, node, familyVsQualifier));
}
// leaf widths are known at this point, so add them up
int totalBytesWithoutOffsets = 0;
for (int i = allNodes.size() - 1; i >= 0; --i) {
ColumnNodeWriter columnNodeWriter = columnNodeWriters.get(i);
// leaves store all but their first token byte
totalBytesWithoutOffsets += columnNodeWriter.getWidthUsingPlaceholderForOffsetWidth(0);
}
// figure out how wide our offset FInts are
int parentOffsetWidth = 0;
while (true) {
++parentOffsetWidth;
int numBytesFinder = totalBytesWithoutOffsets + parentOffsetWidth * allNodes.size();
if (numBytesFinder < UFIntTool.maxValueForNumBytes(parentOffsetWidth)) {
numBytes = numBytesFinder;
break;
}// it fits
}
if (familyVsQualifier) {
blockMeta.setFamilyOffsetWidth(parentOffsetWidth);
} else {
blockMeta.setQualifierOffsetWidth(parentOffsetWidth);
}
int forwardIndex = 0;
for (int i = 0; i < allNodes.size(); ++i) {
TokenizerNode node = allNodes.get(i);
ColumnNodeWriter columnNodeWriter = columnNodeWriters.get(i);
int fullNodeWidth = columnNodeWriter
.getWidthUsingPlaceholderForOffsetWidth(parentOffsetWidth);
node.setOutputArrayOffset(forwardIndex);
columnNodeWriter.setTokenBytes(node.getToken());
if (node.isRoot()) {
columnNodeWriter.setParentStartPosition(0);
} else {
columnNodeWriter.setParentStartPosition(node.getParent().getOutputArrayOffset());
}
forwardIndex += fullNodeWidth;
}
tokenizer.appendOutputArrayOffsets(outputArrayOffsets);
return this;
}
public void writeBytes(OutputStream os) throws IOException {
for (ColumnNodeWriter columnNodeWriter : columnNodeWriters) {
columnNodeWriter.writeBytes(os);
}
}
/************* get/set **************************/
public ArrayList<ColumnNodeWriter> getColumnNodeWriters() {
return columnNodeWriters;
}
public int getNumBytes() {
return numBytes;
}
public int getOutputArrayOffset(int sortedIndex) {
return outputArrayOffsets.get(sortedIndex);
}
public ArrayList<TokenizerNode> getNonLeaves() {
return nonLeaves;
}
public ArrayList<TokenizerNode> getLeaves() {
return leaves;
}
}

View File

@ -0,0 +1,68 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hbase.codec.prefixtree.encode.other;
import org.apache.hadoop.classification.InterfaceAudience;
/**
* Detect if every KV has the same KeyValue.Type, in which case we don't need to store it for each
* KV. If(allSameType) during conversion to byte[], then we can store the "onlyType" in blockMeta,
* therefore not repeating it for each cell and saving 1 byte per cell.
*/
@InterfaceAudience.Private
public class CellTypeEncoder {
/************* fields *********************/
protected boolean pendingFirstType = true;
protected boolean allSameType = true;
protected byte onlyType;
/************* construct *********************/
public void reset() {
pendingFirstType = true;
allSameType = true;
}
/************* methods *************************/
public void add(byte type) {
if (pendingFirstType) {
onlyType = type;
pendingFirstType = false;
} else if (onlyType != type) {
allSameType = false;
}
}
/**************** get/set **************************/
public boolean areAllSameType() {
return allSameType;
}
public byte getOnlyType() {
return onlyType;
}
}

View File

@ -0,0 +1,183 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hbase.codec.prefixtree.encode.other;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.util.Arrays;
import java.util.HashSet;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.hbase.util.ArrayUtils;
import org.apache.hadoop.hbase.util.CollectionUtils;
import org.apache.hbase.util.vint.UFIntTool;
import com.google.common.base.Joiner;
/**
* Used to de-duplicate, sort, minimize/diff, and serialize timestamps and mvccVersions from a
* collection of Cells.
*
* 1. add longs to a HashSet for fast de-duplication
* 2. keep track of the min and max
* 3. copy all values to a new long[]
* 4. Collections.sort the long[]
* 5. calculate maxDelta = max - min
* 6. determine FInt width based on maxDelta
* 7. PrefixTreeEncoder binary searches to find index of each value
*/
@InterfaceAudience.Private
public class LongEncoder {
/****************** fields ****************************/
protected HashSet<Long> uniqueValues;
protected long[] sortedUniqueValues;
protected long min, max, maxDelta;
protected int bytesPerDelta;
protected int bytesPerIndex;
protected int totalCompressedBytes;
/****************** construct ****************************/
public LongEncoder() {
this.uniqueValues = new HashSet<Long>();
}
public void reset() {
uniqueValues.clear();
sortedUniqueValues = null;
min = Long.MAX_VALUE;
max = Long.MIN_VALUE;
maxDelta = Long.MIN_VALUE;
bytesPerIndex = 0;
bytesPerDelta = 0;
totalCompressedBytes = 0;
}
/************* methods ***************************/
public void add(long timestamp) {
uniqueValues.add(timestamp);
}
public LongEncoder compile() {
int numUnique = uniqueValues.size();
if (numUnique == 1) {
min = CollectionUtils.getFirst(uniqueValues);
sortedUniqueValues = new long[] { min };
return this;
}
sortedUniqueValues = new long[numUnique];
int lastIndex = -1;
for (long value : uniqueValues) {
sortedUniqueValues[++lastIndex] = value;
}
Arrays.sort(sortedUniqueValues);
min = ArrayUtils.getFirst(sortedUniqueValues);
max = ArrayUtils.getLast(sortedUniqueValues);
maxDelta = max - min;
if (maxDelta > 0) {
bytesPerDelta = UFIntTool.numBytes(maxDelta);
} else {
bytesPerDelta = 0;
}
int maxIndex = numUnique - 1;
bytesPerIndex = UFIntTool.numBytes(maxIndex);
totalCompressedBytes = numUnique * bytesPerDelta;
return this;
}
public long getDelta(int index) {
if (sortedUniqueValues.length == 0) {
return 0;
}
return sortedUniqueValues[index] - min;
}
public int getIndex(long value) {
// should always find an exact match
return Arrays.binarySearch(sortedUniqueValues, value);
}
public void writeBytes(OutputStream os) throws IOException {
for (int i = 0; i < sortedUniqueValues.length; ++i) {
long delta = sortedUniqueValues[i] - min;
UFIntTool.writeBytes(bytesPerDelta, delta, os);
}
}
//convenience method for tests
public byte[] getByteArray() throws IOException{
ByteArrayOutputStream baos = new ByteArrayOutputStream();
writeBytes(baos);
return baos.toByteArray();
}
public int getOutputArrayLength() {
return sortedUniqueValues.length * bytesPerDelta;
}
public int getNumUniqueValues() {
return sortedUniqueValues.length;
}
/******************* Object methods **********************/
@Override
public String toString() {
if (ArrayUtils.isEmpty(sortedUniqueValues)) {
return "[]";
}
return "[" + Joiner.on(",").join(ArrayUtils.toList(sortedUniqueValues)) + "]";
}
/******************** get/set **************************/
public long getMin() {
return min;
}
public int getBytesPerDelta() {
return bytesPerDelta;
}
public int getBytesPerIndex() {
return bytesPerIndex;
}
public int getTotalCompressedBytes() {
return totalCompressedBytes;
}
public long[] getSortedUniqueTimestamps() {
return sortedUniqueValues;
}
}

View File

@ -0,0 +1,285 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hbase.codec.prefixtree.encode.row;
import java.io.IOException;
import java.io.OutputStream;
import java.util.ArrayList;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.hbase.util.ByteRangeTool;
import org.apache.hadoop.hbase.util.CollectionUtils;
import org.apache.hbase.codec.prefixtree.PrefixTreeBlockMeta;
import org.apache.hbase.codec.prefixtree.encode.PrefixTreeEncoder;
import org.apache.hbase.codec.prefixtree.encode.tokenize.TokenizerNode;
import org.apache.hbase.util.vint.UFIntTool;
import org.apache.hbase.util.vint.UVIntTool;
/**
* Serializes the fields comprising one node of the row trie, which can be a branch, nub, or leaf.
* Please see the write() method for the order in which data is written.
*/
@InterfaceAudience.Private
public class RowNodeWriter{
protected static final Log LOG = LogFactory.getLog(RowNodeWriter.class);
/********************* fields ******************************/
protected PrefixTreeEncoder prefixTreeEncoder;
protected PrefixTreeBlockMeta blockMeta;
protected TokenizerNode tokenizerNode;
protected int tokenWidth;
protected int fanOut;
protected int numCells;
protected int width;
/*********************** construct *************************/
public RowNodeWriter(PrefixTreeEncoder keyValueBuilder, TokenizerNode tokenizerNode) {
reconstruct(keyValueBuilder, tokenizerNode);
}
public void reconstruct(PrefixTreeEncoder prefixTreeEncoder, TokenizerNode tokenizerNode) {
this.prefixTreeEncoder = prefixTreeEncoder;
reset(tokenizerNode);
}
public void reset(TokenizerNode node) {
this.blockMeta = prefixTreeEncoder.getBlockMeta();// changes between blocks
this.tokenizerNode = node;
this.tokenWidth = 0;
this.fanOut = 0;
this.numCells = 0;
this.width = 0;
calculateOffsetsAndLengths();
}
/********************* methods ****************************/
protected void calculateOffsetsAndLengths(){
tokenWidth = tokenizerNode.getTokenLength();
if(!tokenizerNode.isRoot()){
--tokenWidth;//root has no parent
}
fanOut = CollectionUtils.nullSafeSize(tokenizerNode.getChildren());
numCells = tokenizerNode.getNumOccurrences();
}
public int calculateWidth(){
calculateWidthOverrideOffsetWidth(blockMeta.getNextNodeOffsetWidth());
return width;
}
public int calculateWidthOverrideOffsetWidth(int offsetWidth){
width = 0;
width += UVIntTool.numBytes(tokenWidth);
width += tokenWidth;
width += UVIntTool.numBytes(fanOut);
width += fanOut;
width += UVIntTool.numBytes(numCells);
if(tokenizerNode.hasOccurrences()){
int fixedBytesPerCell = blockMeta.getFamilyOffsetWidth()
+ blockMeta.getQualifierOffsetWidth()
+ blockMeta.getTimestampIndexWidth()
+ blockMeta.getMvccVersionIndexWidth()
+ blockMeta.getKeyValueTypeWidth()
+ blockMeta.getValueOffsetWidth()
+ blockMeta.getValueLengthWidth();
width += numCells * fixedBytesPerCell;
}
if( ! tokenizerNode.isLeaf()){
width += fanOut * offsetWidth;
}
return width;
}
/*********************** writing the compiled structure to the OutputStream ***************/
public void write(OutputStream os) throws IOException{
//info about this row trie node
writeRowToken(os);
writeFan(os);
writeNumCells(os);
//UFInt indexes and offsets for each cell in the row (if nub or leaf)
writeFamilyNodeOffsets(os);
writeQualifierNodeOffsets(os);
writeTimestampIndexes(os);
writeMvccVersionIndexes(os);
writeCellTypes(os);
writeValueOffsets(os);
writeValueLengths(os);
//offsets to the children of this row trie node (if branch or nub)
writeNextRowTrieNodeOffsets(os);
}
/**
* Row node token, fan, and numCells. Written once at the beginning of each row node. These 3
* fields can reproduce all the row keys that compose the block.
*/
/**
* UVInt: tokenWidth
* bytes: token
*/
protected void writeRowToken(OutputStream os) throws IOException {
UVIntTool.writeBytes(tokenWidth, os);
int tokenStartIndex = tokenizerNode.isRoot() ? 0 : 1;
ByteRangeTool.write(os, tokenizerNode.getToken(), tokenStartIndex);
}
/**
* UVInt: numFanBytes/fanOut
* bytes: each fan byte
*/
public void writeFan(OutputStream os) throws IOException {
UVIntTool.writeBytes(fanOut, os);
if (fanOut <= 0) {
return;
}
ArrayList<TokenizerNode> children = tokenizerNode.getChildren();
for (int i = 0; i < children.size(); ++i) {
TokenizerNode child = children.get(i);
os.write(child.getToken().get(0));// first byte of each child's token
}
}
/**
* UVInt: numCells, the number of cells in this row which will be 0 for branch nodes
*/
protected void writeNumCells(OutputStream os) throws IOException {
UVIntTool.writeBytes(numCells, os);
}
/**
* The following methods write data for each cell in the row, mostly consisting of indexes or
* offsets into the timestamp/column data structures that are written in the middle of the block.
* We use {@link UFIntTool} to encode these indexes/offsets to allow random access during a binary
* search of a particular column/timestamp combination.
* <p/>
* Branch nodes will not have any data in these sections.
*/
protected void writeFamilyNodeOffsets(OutputStream os) throws IOException {
if (blockMeta.getFamilyOffsetWidth() <= 0) {
return;
}
for (int i = 0; i < numCells; ++i) {
int cellInsertionIndex = PrefixTreeEncoder.MULITPLE_FAMILIES_POSSIBLE ? tokenizerNode
.getFirstInsertionIndex() + i : 0;
int sortedIndex = prefixTreeEncoder.getFamilySorter().getSortedIndexForInsertionId(
cellInsertionIndex);
int indexedFamilyOffset = prefixTreeEncoder.getFamilyWriter().getOutputArrayOffset(
sortedIndex);
UFIntTool.writeBytes(blockMeta.getFamilyOffsetWidth(), indexedFamilyOffset, os);
}
}
protected void writeQualifierNodeOffsets(OutputStream os) throws IOException {
if (blockMeta.getQualifierOffsetWidth() <= 0) {
return;
}
for (int i = 0; i < numCells; ++i) {
int cellInsertionIndex = tokenizerNode.getFirstInsertionIndex() + i;
int sortedIndex = prefixTreeEncoder.getQualifierSorter().getSortedIndexForInsertionId(
cellInsertionIndex);
int indexedQualifierOffset = prefixTreeEncoder.getQualifierWriter().getOutputArrayOffset(
sortedIndex);
UFIntTool.writeBytes(blockMeta.getQualifierOffsetWidth(), indexedQualifierOffset, os);
}
}
protected void writeTimestampIndexes(OutputStream os) throws IOException {
if (blockMeta.getTimestampIndexWidth() <= 0) {
return;
}
for (int i = 0; i < numCells; ++i) {
int cellInsertionIndex = tokenizerNode.getFirstInsertionIndex() + i;
long timestamp = prefixTreeEncoder.getTimestamps()[cellInsertionIndex];
int timestampIndex = prefixTreeEncoder.getTimestampEncoder().getIndex(timestamp);
UFIntTool.writeBytes(blockMeta.getTimestampIndexWidth(), timestampIndex, os);
}
}
protected void writeMvccVersionIndexes(OutputStream os) throws IOException {
if (blockMeta.getMvccVersionIndexWidth() <= 0) {
return;
}
for (int i = 0; i < numCells; ++i) {
int cellInsertionIndex = tokenizerNode.getFirstInsertionIndex() + i;
long mvccVersion = prefixTreeEncoder.getMvccVersions()[cellInsertionIndex];
int mvccVersionIndex = prefixTreeEncoder.getMvccVersionEncoder().getIndex(mvccVersion);
UFIntTool.writeBytes(blockMeta.getMvccVersionIndexWidth(), mvccVersionIndex, os);
}
}
protected void writeCellTypes(OutputStream os) throws IOException {
if (blockMeta.isAllSameType()) {
return;
}
for (int i = 0; i < numCells; ++i) {
int cellInsertionIndex = tokenizerNode.getFirstInsertionIndex() + i;
os.write(prefixTreeEncoder.getTypeBytes()[cellInsertionIndex]);
}
}
protected void writeValueOffsets(OutputStream os) throws IOException {
for (int i = 0; i < numCells; ++i) {
int cellInsertionIndex = tokenizerNode.getFirstInsertionIndex() + i;
long valueStartIndex = prefixTreeEncoder.getValueOffset(cellInsertionIndex);
UFIntTool.writeBytes(blockMeta.getValueOffsetWidth(), valueStartIndex, os);
}
}
protected void writeValueLengths(OutputStream os) throws IOException {
for (int i = 0; i < numCells; ++i) {
int cellInsertionIndex = tokenizerNode.getFirstInsertionIndex() + i;
int valueLength = prefixTreeEncoder.getValueLength(cellInsertionIndex);
UFIntTool.writeBytes(blockMeta.getValueLengthWidth(), valueLength, os);
}
}
/**
* If a branch or a nub, the last thing we append are the UFInt offsets to the child row nodes.
*/
protected void writeNextRowTrieNodeOffsets(OutputStream os) throws IOException {
ArrayList<TokenizerNode> children = tokenizerNode.getChildren();
for (int i = 0; i < children.size(); ++i) {
TokenizerNode child = children.get(i);
int distanceToChild = tokenizerNode.getNegativeIndex() - child.getNegativeIndex();
UFIntTool.writeBytes(blockMeta.getNextNodeOffsetWidth(), distanceToChild, os);
}
}
}

View File

@ -0,0 +1,219 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hbase.codec.prefixtree.encode.row;
import java.io.IOException;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.List;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hbase.codec.prefixtree.PrefixTreeBlockMeta;
import org.apache.hbase.codec.prefixtree.encode.PrefixTreeEncoder;
import org.apache.hbase.codec.prefixtree.encode.tokenize.TokenizerNode;
import org.apache.hbase.util.vint.UFIntTool;
import com.google.common.collect.Lists;
/**
* Most of the complexity of the PrefixTree is contained in the "row section". It contains the row
* key trie structure used to search and recreate all the row keys. Each nub and leaf in this trie
* also contains references to offsets in the other sections of the data block that enable the
* decoder to match a row key with its qualifier, timestamp, type, value, etc.
* <p>
* The row section is a concatenated collection of {@link RowNodeWriter}s. See that class for the
* internals of each row node.
*/
@InterfaceAudience.Private
public class RowSectionWriter {
/***************** fields **************************/
protected PrefixTreeEncoder prefixTreeEncoder;
protected PrefixTreeBlockMeta blockMeta;
protected int numBytes;
protected ArrayList<TokenizerNode> nonLeaves;
protected ArrayList<TokenizerNode> leaves;
protected ArrayList<RowNodeWriter> leafWriters;
protected ArrayList<RowNodeWriter> nonLeafWriters;
protected int numLeafWriters;
protected int numNonLeafWriters;
/********************* construct **********************/
public RowSectionWriter() {
this.nonLeaves = Lists.newArrayList();
this.leaves = Lists.newArrayList();
this.leafWriters = Lists.newArrayList();
this.nonLeafWriters = Lists.newArrayList();
}
public RowSectionWriter(PrefixTreeEncoder prefixTreeEncoder) {
reconstruct(prefixTreeEncoder);
}
public void reconstruct(PrefixTreeEncoder prefixTreeEncoder) {
this.prefixTreeEncoder = prefixTreeEncoder;
this.blockMeta = prefixTreeEncoder.getBlockMeta();
reset();
}
public void reset() {
numBytes = 0;
nonLeaves.clear();
leaves.clear();
numLeafWriters = 0;
numNonLeafWriters = 0;
}
/****************** methods *******************************/
public RowSectionWriter compile() {
blockMeta.setMaxRowLength(prefixTreeEncoder.getRowTokenizer().getMaxElementLength());
prefixTreeEncoder.getRowTokenizer().setNodeFirstInsertionIndexes();
prefixTreeEncoder.getRowTokenizer().appendNodes(nonLeaves, true, false);
prefixTreeEncoder.getRowTokenizer().appendNodes(leaves, false, true);
// track the starting position of each node in final output
int negativeIndex = 0;
// create leaf writer nodes
// leaf widths are known at this point, so add them up
int totalLeafBytes = 0;
for (int i = leaves.size() - 1; i >= 0; --i) {
TokenizerNode leaf = leaves.get(i);
RowNodeWriter leafWriter = initializeWriter(leafWriters, numLeafWriters, leaf);
++numLeafWriters;
// leaves store all but their first token byte
int leafNodeWidth = leafWriter.calculateWidthOverrideOffsetWidth(0);
totalLeafBytes += leafNodeWidth;
negativeIndex += leafNodeWidth;
leaf.setNegativeIndex(negativeIndex);
}
int totalNonLeafBytesWithoutOffsets = 0;
int totalChildPointers = 0;
for (int i = nonLeaves.size() - 1; i >= 0; --i) {
TokenizerNode nonLeaf = nonLeaves.get(i);
RowNodeWriter nonLeafWriter = initializeWriter(nonLeafWriters, numNonLeafWriters, nonLeaf);
++numNonLeafWriters;
totalNonLeafBytesWithoutOffsets += nonLeafWriter.calculateWidthOverrideOffsetWidth(0);
totalChildPointers += nonLeaf.getNumChildren();
}
// figure out how wide our offset FInts are
int offsetWidth = 0;
while (true) {
++offsetWidth;
int offsetBytes = totalChildPointers * offsetWidth;
int totalRowBytes = totalNonLeafBytesWithoutOffsets + offsetBytes + totalLeafBytes;
if (totalRowBytes < UFIntTool.maxValueForNumBytes(offsetWidth)) {
// it fits
numBytes = totalRowBytes;
break;
}
}
blockMeta.setNextNodeOffsetWidth(offsetWidth);
// populate negativeIndexes
for (int i = nonLeaves.size() - 1; i >= 0; --i) {
TokenizerNode nonLeaf = nonLeaves.get(i);
int writerIndex = nonLeaves.size() - i - 1;
RowNodeWriter nonLeafWriter = nonLeafWriters.get(writerIndex);
int nodeWidth = nonLeafWriter.calculateWidth();
negativeIndex += nodeWidth;
nonLeaf.setNegativeIndex(negativeIndex);
}
return this;
}
protected RowNodeWriter initializeWriter(List<RowNodeWriter> list, int index,
TokenizerNode builderNode) {
RowNodeWriter rowNodeWriter = null;
//check if there is an existing node we can recycle
if (index >= list.size()) {
//there are not enough existing nodes, so add a new one which will be retrieved below
list.add(new RowNodeWriter(prefixTreeEncoder, builderNode));
}
rowNodeWriter = list.get(index);
rowNodeWriter.reset(builderNode);
return rowNodeWriter;
}
public void writeBytes(OutputStream os) throws IOException {
for (int i = numNonLeafWriters - 1; i >= 0; --i) {
RowNodeWriter nonLeafWriter = nonLeafWriters.get(i);
nonLeafWriter.write(os);
}
// duplicates above... written more for clarity right now
for (int i = numLeafWriters - 1; i >= 0; --i) {
RowNodeWriter leafWriter = leafWriters.get(i);
leafWriter.write(os);
}
}
/***************** static ******************************/
protected static ArrayList<TokenizerNode> filterByLeafAndReverse(
ArrayList<TokenizerNode> ins, boolean leaves) {
ArrayList<TokenizerNode> outs = Lists.newArrayList();
for (int i = ins.size() - 1; i >= 0; --i) {
TokenizerNode n = ins.get(i);
if (n.isLeaf() && leaves || (!n.isLeaf() && !leaves)) {
outs.add(ins.get(i));
}
}
return outs;
}
/************* get/set **************************/
public int getNumBytes() {
return numBytes;
}
public ArrayList<TokenizerNode> getNonLeaves() {
return nonLeaves;
}
public ArrayList<TokenizerNode> getLeaves() {
return leaves;
}
public ArrayList<RowNodeWriter> getNonLeafWriters() {
return nonLeafWriters;
}
public ArrayList<RowNodeWriter> getLeafWriters() {
return leafWriters;
}
}

View File

@ -0,0 +1,64 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hbase.codec.prefixtree.encode.tokenize;
import java.util.Comparator;
import org.apache.hadoop.classification.InterfaceAudience;
/**
* Determines order of nodes in the output array. Maybe possible to optimize further.
*/
@InterfaceAudience.Private
public class TokenDepthComparator implements Comparator<TokenizerNode> {
@Override
public int compare(TokenizerNode a, TokenizerNode b) {
if(a==null){
throw new IllegalArgumentException("a cannot be null");
}
if(b==null){
throw new IllegalArgumentException("b cannot be null");
}
// put leaves at the end
if (!a.isLeaf() && b.isLeaf()) {
return -1;
}
if (a.isLeaf() && !b.isLeaf()) {
return 1;
}
if (a.isLeaf() && b.isLeaf()) {// keep leaves in sorted order (for debugability)
return a.getId() < b.getId() ? -1 : 1;
}
// compare depth
if (a.getTokenOffset() < b.getTokenOffset()) {
return -1;
}
if (a.getTokenOffset() > b.getTokenOffset()) {
return 1;
}
// if same depth, return lower id first. ids are unique
return a.getId() < b.getId() ? -1 : 1;
}
}

View File

@ -0,0 +1,239 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hbase.codec.prefixtree.encode.tokenize;
import java.util.ArrayList;
import java.util.List;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.hbase.util.ArrayUtils;
import org.apache.hadoop.hbase.util.ByteRange;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.CollectionUtils;
import com.google.common.collect.Lists;
/**
* Data structure used in the first stage of PrefixTree encoding:
* <li>accepts a sorted stream of ByteRanges
* <li>splits them into a set of tokens, each held by a {@link TokenizerNode}
* <li>connects the TokenizerNodes via standard java references
* <li>keeps a pool of TokenizerNodes and a reusable byte[] for holding all token content
* <p><br>
* Mainly used for turning Cell rowKeys into a trie, but also used for family and qualifier
* encoding.
*/
@InterfaceAudience.Private
public class Tokenizer{
/***************** fields **************************/
protected int numArraysAdded = 0;
protected long lastNodeId = -1;
protected ArrayList<TokenizerNode> nodes;
protected int numNodes;
protected TokenizerNode root;
protected byte[] tokens;
protected int tokensLength;
protected int maxElementLength = 0;
// number of levels in the tree assuming root level is 0
protected int treeDepth = 0;
/******************* construct *******************/
public Tokenizer() {
this.nodes = Lists.newArrayList();
this.tokens = new byte[0];
}
public void reset() {
numArraysAdded = 0;
lastNodeId = -1;
numNodes = 0;
tokensLength = 0;
root = null;
maxElementLength = 0;
treeDepth = 0;
}
/***************** building *************************/
public void addAll(ArrayList<ByteRange> sortedByteRanges) {
for (int i = 0; i < sortedByteRanges.size(); ++i) {
ByteRange byteRange = sortedByteRanges.get(i);
addSorted(byteRange);
}
}
public void addSorted(final ByteRange bytes) {
++numArraysAdded;
if (bytes.getLength() > maxElementLength) {
maxElementLength = bytes.getLength();
}
if (root == null) {
// nodeDepth of firstNode (non-root) is 1
root = addNode(null, 1, 0, bytes, 0);
} else {
root.addSorted(bytes);
}
}
public void incrementNumOccurrencesOfLatestValue(){
CollectionUtils.getLast(nodes).incrementNumOccurrences(1);
}
protected long nextNodeId() {
return ++lastNodeId;
}
protected TokenizerNode addNode(TokenizerNode parent, int nodeDepth, int tokenStartOffset,
final ByteRange token, int inputTokenOffset) {
int inputTokenLength = token.getLength() - inputTokenOffset;
int tokenOffset = appendTokenAndRepointByteRange(token, inputTokenOffset);
TokenizerNode node = null;
if (nodes.size() <= numNodes) {
node = new TokenizerNode(this, parent, nodeDepth, tokenStartOffset, tokenOffset,
inputTokenLength);
nodes.add(node);
} else {
node = nodes.get(numNodes);
node.reset();
node.reconstruct(this, parent, nodeDepth, tokenStartOffset, tokenOffset, inputTokenLength);
}
++numNodes;
return node;
}
protected int appendTokenAndRepointByteRange(final ByteRange token, int inputTokenOffset) {
int newOffset = tokensLength;
int inputTokenLength = token.getLength() - inputTokenOffset;
int newMinimum = tokensLength + inputTokenLength;
tokens = ArrayUtils.growIfNecessary(tokens, newMinimum, 2 * newMinimum);
token.deepCopySubRangeTo(inputTokenOffset, inputTokenLength, tokens, tokensLength);
tokensLength += inputTokenLength;
return newOffset;
}
protected void submitMaxNodeDepthCandidate(int nodeDepth) {
if (nodeDepth > treeDepth) {
treeDepth = nodeDepth;
}
}
/********************* read ********************/
public int getNumAdded(){
return numArraysAdded;
}
// for debugging
public ArrayList<TokenizerNode> getNodes(boolean includeNonLeaves, boolean includeLeaves) {
ArrayList<TokenizerNode> nodes = Lists.newArrayList();
root.appendNodesToExternalList(nodes, includeNonLeaves, includeLeaves);
return nodes;
}
public void appendNodes(List<TokenizerNode> appendTo, boolean includeNonLeaves,
boolean includeLeaves) {
root.appendNodesToExternalList(appendTo, includeNonLeaves, includeLeaves);
}
public List<byte[]> getArrays() {
List<TokenizerNode> nodes = new ArrayList<TokenizerNode>();
root.appendNodesToExternalList(nodes, true, true);
List<byte[]> byteArrays = Lists.newArrayListWithCapacity(CollectionUtils.nullSafeSize(nodes));
for (int i = 0; i < nodes.size(); ++i) {
TokenizerNode node = nodes.get(i);
for (int j = 0; j < node.getNumOccurrences(); ++j) {
byte[] byteArray = node.getNewByteArray();
byteArrays.add(byteArray);
}
}
return byteArrays;
}
//currently unused, but working and possibly useful in the future
public void getNode(TokenizerRowSearchResult resultHolder, byte[] key, int keyOffset,
int keyLength) {
root.getNode(resultHolder, key, keyOffset, keyLength);
}
/********************** write ***************************/
public Tokenizer setNodeFirstInsertionIndexes() {
root.setInsertionIndexes(0);
return this;
}
public Tokenizer appendOutputArrayOffsets(List<Integer> offsets) {
root.appendOutputArrayOffsets(offsets);
return this;
}
/********************* print/debug ********************/
protected static final Boolean INCLUDE_FULL_TREE_IN_TO_STRING = false;
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
sb.append(getStructuralString());
if (INCLUDE_FULL_TREE_IN_TO_STRING) {
for (byte[] bytes : getArrays()) {
if (sb.length() > 0) {
sb.append("\n");
}
sb.append(Bytes.toString(bytes));
}
}
return sb.toString();
}
public String getStructuralString() {
List<TokenizerNode> nodes = getNodes(true, true);
StringBuilder sb = new StringBuilder();
for (TokenizerNode node : nodes) {
String line = node.getPaddedTokenAndOccurrenceString();
sb.append(line + "\n");
}
return sb.toString();
}
/****************** get/set ************************/
public TokenizerNode getRoot() {
return root;
}
public int getMaxElementLength() {
return maxElementLength;
}
public int getTreeDepth() {
return treeDepth;
}
}

View File

@ -0,0 +1,632 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hbase.codec.prefixtree.encode.tokenize;
import java.util.ArrayList;
import java.util.List;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.hbase.util.ByteRange;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.CollectionUtils;
import org.apache.hadoop.hbase.util.Strings;
import com.google.common.collect.Lists;
/**
* Individual node in a Trie structure. Each node is one of 3 types:
* <li>Branch: an internal trie node that may have a token and must have multiple children, but does
* not represent an actual input byte[], hence its numOccurrences is 0
* <li>Leaf: a node with no children and where numOccurrences is >= 1. It's token represents the
* last bytes in the input byte[]s.
* <li>Nub: a combination of a branch and leaf. Its token represents the last bytes of input
* byte[]s and has numOccurrences >= 1, but it also has child nodes which represent input byte[]s
* that add bytes to this nodes input byte[].
* <br/><br/>
* Example inputs (numInputs=7):
* 0: AAA
* 1: AAA
* 2: AAB
* 3: AAB
* 4: AAB
* 5: AABQQ
* 6: AABQQ
* <br/><br/>
* Resulting TokenizerNodes:
* AA <- branch, numOccurrences=0, tokenStartOffset=0, token.length=2
* A <- leaf, numOccurrences=2, tokenStartOffset=2, token.length=1
* B <- nub, numOccurrences=3, tokenStartOffset=2, token.length=1
* QQ <- leaf, numOccurrences=2, tokenStartOffset=3, token.length=2
* <br/><br/>
* numInputs == 7 == sum(numOccurrences) == 0 + 2 + 3 + 2
*/
@InterfaceAudience.Private
public class TokenizerNode{
/*
* Ref to data structure wrapper
*/
protected Tokenizer builder;
/******************************************************************
* Tree content/structure used during tokenization
* ****************************************************************/
/*
* ref to parent trie node
*/
protected TokenizerNode parent;
/*
* node depth in trie, irrespective of each node's token length
*/
protected int nodeDepth;
/*
* start index of this token in original byte[]
*/
protected int tokenStartOffset;
/*
* bytes for this trie node. can be length 0 in root node
*/
protected ByteRange token;
/*
* A count of occurrences in the input byte[]s, not the trie structure. 0 for branch nodes, 1+ for
* nubs and leaves. If the same byte[] is added to the trie multiple times, this is the only thing
* that changes in the tokenizer. As a result, duplicate byte[]s are very inexpensive to encode.
*/
protected int numOccurrences;
/*
* The maximum fan-out of a byte[] trie is 256, so there are a maximum of 256
* child nodes.
*/
protected ArrayList<TokenizerNode> children;
/*
* Fields used later in the encoding process for sorting the nodes into the order they'll be
* written to the output byte[]. With these fields, the TokenizerNode and therefore Tokenizer
* are not generic data structures but instead are specific to HBase PrefixTree encoding.
*/
/*
* unique id assigned to each TokenizerNode
*/
protected long id;
/*
* set >=0 for nubs and leaves
*/
protected int firstInsertionIndex = -1;
/*
* A positive value indicating how many bytes before the end of the block this node will start. If
* the section is 55 bytes and negativeOffset is 9, then the node will start at 46.
*/
protected int negativeIndex = 0;
/*
* The offset in the output array at which to start writing this node's token bytes. Influenced
* by the lengths of all tokens sorted before this one.
*/
protected int outputArrayOffset = -1;
/*********************** construct *****************************/
public TokenizerNode(Tokenizer builder, TokenizerNode parent, int nodeDepth,
int tokenStartOffset, int tokenOffset, int tokenLength) {
this.token = new ByteRange();
reconstruct(builder, parent, nodeDepth, tokenStartOffset, tokenOffset, tokenLength);
this.children = Lists.newArrayList();
}
/*
* Sub-constructor for initializing all fields without allocating a new object. Used by the
* regular constructor.
*/
public void reconstruct(Tokenizer builder, TokenizerNode parent, int nodeDepth,
int tokenStartOffset, int tokenOffset, int tokenLength) {
this.builder = builder;
this.id = builder.nextNodeId();
this.parent = parent;
this.nodeDepth = nodeDepth;
builder.submitMaxNodeDepthCandidate(nodeDepth);
this.tokenStartOffset = tokenStartOffset;
this.token.set(builder.tokens, tokenOffset, tokenLength);
this.numOccurrences = 1;
}
/*
* Clear the state of this node so that it looks like it was just allocated.
*/
public void reset() {
builder = null;
parent = null;
nodeDepth = 0;
tokenStartOffset = 0;
token.clear();
numOccurrences = 0;
children.clear();// branches & nubs
// ids/offsets. used during writing to byte[]
id = 0;
firstInsertionIndex = -1;// set >=0 for nubs and leaves
negativeIndex = 0;
outputArrayOffset = -1;
}
/************************* building *********************************/
/*
* <li>Only public method used during the tokenization process
* <li>Requires that the input ByteRange sort after the previous, and therefore after all previous
* inputs
* <li>Only looks at bytes of the input array that align with this node's token
*/
public void addSorted(final ByteRange bytes) {// recursively build the tree
/*
* Recurse deeper into the existing trie structure
*/
if (matchesToken(bytes) && CollectionUtils.notEmpty(children)) {
TokenizerNode lastChild = CollectionUtils.getLast(children);
if (lastChild.partiallyMatchesToken(bytes)) {
lastChild.addSorted(bytes);
return;
}
}
/*
* Recursion ended. We must either
* <li>1: increment numOccurrences if this input was equal to the previous
* <li>2: convert this node from a leaf to a nub, and add a new child leaf
* <li>3: split this node into a branch and leaf, and then add a second leaf
*/
// add it as a child of this node
int numIdenticalTokenBytes = numIdenticalBytes(bytes);// should be <= token.length
int tailOffset = tokenStartOffset + numIdenticalTokenBytes;
int tailLength = bytes.getLength() - tailOffset;
if (numIdenticalTokenBytes == token.getLength()) {
if (tailLength == 0) {// identical to this node (case 1)
incrementNumOccurrences(1);
} else {// identical to this node, but with a few extra tailing bytes. (leaf -> nub) (case 2)
int childNodeDepth = nodeDepth + 1;
int childTokenStartOffset = tokenStartOffset + numIdenticalTokenBytes;
TokenizerNode newChildNode = builder.addNode(this, childNodeDepth, childTokenStartOffset,
bytes, tailOffset);
addChild(newChildNode);
}
} else {//numIdenticalBytes > 0, split into branch/leaf and then add second leaf (case 3)
split(numIdenticalTokenBytes, bytes);
}
}
protected void addChild(TokenizerNode node) {
node.setParent(this);
children.add(node);
}
/**
* Called when we need to convert a leaf node into a branch with 2 leaves. Comments inside the
* method assume we have token BAA starting at tokenStartOffset=0 and are adding BOO. The output
* will be 3 nodes:<br/>
* <li>1: B <- branch
* <li>2: AA <- leaf
* <li>3: OO <- leaf
*
* @param numTokenBytesToRetain => 1 (the B)
* @param bytes => BOO
*/
protected void split(int numTokenBytesToRetain, final ByteRange bytes) {
int childNodeDepth = nodeDepth;
int childTokenStartOffset = tokenStartOffset + numTokenBytesToRetain;
//create leaf AA
TokenizerNode firstChild = builder.addNode(this, childNodeDepth, childTokenStartOffset,
token, numTokenBytesToRetain);
firstChild.setNumOccurrences(numOccurrences);// do before clearing this node's numOccurrences
token.setLength(numTokenBytesToRetain);//shorten current token from BAA to B
numOccurrences = 0;//current node is now a branch
moveChildrenToDifferentParent(firstChild);//point the new leaf (AA) to the new branch (B)
addChild(firstChild);//add the new leaf (AA) to the branch's (B's) children
//create leaf OO
TokenizerNode secondChild = builder.addNode(this, childNodeDepth, childTokenStartOffset,
bytes, tokenStartOffset + numTokenBytesToRetain);
addChild(secondChild);//add the new leaf (00) to the branch's (B's) children
// we inserted branch node B as a new level above/before the two children, so increment the
// depths of the children below
firstChild.incrementNodeDepthRecursively();
secondChild.incrementNodeDepthRecursively();
}
protected void incrementNodeDepthRecursively() {
++nodeDepth;
builder.submitMaxNodeDepthCandidate(nodeDepth);
for (int i = 0; i < children.size(); ++i) {
children.get(i).incrementNodeDepthRecursively();
}
}
protected void moveChildrenToDifferentParent(TokenizerNode newParent) {
for (int i = 0; i < children.size(); ++i) {
TokenizerNode child = children.get(i);
child.setParent(newParent);
newParent.children.add(child);
}
children.clear();
}
/************************ byte[] utils *************************/
protected boolean partiallyMatchesToken(ByteRange bytes) {
return numIdenticalBytes(bytes) > 0;
}
protected boolean matchesToken(ByteRange bytes) {
return numIdenticalBytes(bytes) == getTokenLength();
}
protected int numIdenticalBytes(ByteRange bytes) {
return token.numEqualPrefixBytes(bytes, tokenStartOffset);
}
/***************** moving nodes around ************************/
public void appendNodesToExternalList(List<TokenizerNode> appendTo, boolean includeNonLeaves,
boolean includeLeaves) {
if (includeNonLeaves && !isLeaf() || includeLeaves && isLeaf()) {
appendTo.add(this);
}
for (int i = 0; i < children.size(); ++i) {
TokenizerNode child = children.get(i);
child.appendNodesToExternalList(appendTo, includeNonLeaves, includeLeaves);
}
}
public int setInsertionIndexes(int nextIndex) {
int newNextIndex = nextIndex;
if (hasOccurrences()) {
setFirstInsertionIndex(nextIndex);
newNextIndex += numOccurrences;
}
for (int i = 0; i < children.size(); ++i) {
TokenizerNode child = children.get(i);
newNextIndex = child.setInsertionIndexes(newNextIndex);
}
return newNextIndex;
}
public void appendOutputArrayOffsets(List<Integer> offsets) {
if (hasOccurrences()) {
offsets.add(outputArrayOffset);
}
for (int i = 0; i < children.size(); ++i) {
TokenizerNode child = children.get(i);
child.appendOutputArrayOffsets(offsets);
}
}
/***************** searching *********************************/
/*
* Do a trie style search through the tokenizer. One option for looking up families or qualifiers
* during encoding, but currently unused in favor of tracking this information as they are added.
*
* Keeping code pending further performance testing.
*/
public void getNode(TokenizerRowSearchResult resultHolder, byte[] key, int keyOffset,
int keyLength) {
int thisNodeDepthPlusLength = tokenStartOffset + token.getLength();
// quick check if the key is shorter than this node (may not work for binary search)
if (CollectionUtils.isEmpty(children)) {
if (thisNodeDepthPlusLength < keyLength) {// ran out of bytes
resultHolder.set(TokenizerRowSearchPosition.NO_MATCH, null);
return;
}
}
// all token bytes must match
for (int i = 0; i < token.getLength(); ++i) {
if (key[tokenStartOffset + keyOffset + i] != token.get(i)) {
// TODO return whether it's before or after so we can binary search
resultHolder.set(TokenizerRowSearchPosition.NO_MATCH, null);
return;
}
}
if (thisNodeDepthPlusLength == keyLength && numOccurrences > 0) {
resultHolder.set(TokenizerRowSearchPosition.MATCH, this);// MATCH
return;
}
if (CollectionUtils.notEmpty(children)) {
// TODO binary search the children
for (int i = 0; i < children.size(); ++i) {
TokenizerNode child = children.get(i);
child.getNode(resultHolder, key, keyOffset, keyLength);
if (resultHolder.isMatch()) {
return;
} else if (resultHolder.getDifference() == TokenizerRowSearchPosition.BEFORE) {
// passed it, so it doesn't exist
resultHolder.set(TokenizerRowSearchPosition.NO_MATCH, null);
return;
}
// key is still AFTER the current node, so continue searching
}
}
// checked all children (or there were no children), and didn't find it
resultHolder.set(TokenizerRowSearchPosition.NO_MATCH, null);
return;
}
/****************** writing back to byte[]'s *************************/
public byte[] getNewByteArray() {
byte[] arrayToFill = new byte[tokenStartOffset + token.getLength()];
fillInBytes(arrayToFill);
return arrayToFill;
}
public void fillInBytes(byte[] arrayToFill) {
for (int i = 0; i < token.getLength(); ++i) {
arrayToFill[tokenStartOffset + i] = token.get(i);
}
if (parent != null) {
parent.fillInBytes(arrayToFill);
}
}
/************************** printing ***********************/
@Override
public String toString() {
String s = "";
if (parent == null) {
s += "R ";
} else {
s += getBnlIndicator(false) + " " + Bytes.toString(parent.getNewByteArray());
}
s += "[" + Bytes.toString(token.deepCopyToNewArray()) + "]";
if (numOccurrences > 0) {
s += "x" + numOccurrences;
}
return s;
}
public String getPaddedTokenAndOccurrenceString() {
StringBuilder sb = new StringBuilder();
sb.append(getBnlIndicator(true));
sb.append(Strings.padFront(numOccurrences + "", ' ', 3));
sb.append(Strings.padFront(nodeDepth + "", ' ', 3));
if (outputArrayOffset >= 0) {
sb.append(Strings.padFront(outputArrayOffset + "", ' ', 3));
}
sb.append(" ");
for (int i = 0; i < tokenStartOffset; ++i) {
sb.append(" ");
}
sb.append(Bytes.toString(token.deepCopyToNewArray()).replaceAll(" ", "_"));
return sb.toString();
}
public String getBnlIndicator(boolean indent) {
if (indent) {
if (isNub()) {
return " N ";
}
return isBranch() ? "B " : " L";
}
if (isNub()) {
return "N";
}
return isBranch() ? "B" : "L";
}
/********************** count different node types ********************/
public int getNumBranchNodesIncludingThisNode() {
if (isLeaf()) {
return 0;
}
int totalFromThisPlusChildren = isBranch() ? 1 : 0;
for (int i = 0; i < children.size(); ++i) {
TokenizerNode child = children.get(i);
totalFromThisPlusChildren += child.getNumBranchNodesIncludingThisNode();
}
return totalFromThisPlusChildren;
}
public int getNumNubNodesIncludingThisNode() {
if (isLeaf()) {
return 0;
}
int totalFromThisPlusChildren = isNub() ? 1 : 0;
for (int i = 0; i < children.size(); ++i) {
TokenizerNode child = children.get(i);
totalFromThisPlusChildren += child.getNumNubNodesIncludingThisNode();
}
return totalFromThisPlusChildren;
}
public int getNumLeafNodesIncludingThisNode() {
if (isLeaf()) {
return 1;
}
int totalFromChildren = 0;
for (int i = 0; i < children.size(); ++i) {
TokenizerNode child = children.get(i);
totalFromChildren += child.getNumLeafNodesIncludingThisNode();
}
return totalFromChildren;
}
/*********************** simple read-only methods *******************************/
public int getNodeDepth() {
return nodeDepth;
}
public int getTokenLength() {
return token.getLength();
}
public boolean hasOccurrences() {
return numOccurrences > 0;
}
public boolean isRoot() {
return this.parent == null;
}
public int getNumChildren() {
return CollectionUtils.nullSafeSize(children);
}
public TokenizerNode getLastChild() {
if (CollectionUtils.isEmpty(children)) {
return null;
}
return CollectionUtils.getLast(children);
}
public boolean isLeaf() {
return CollectionUtils.isEmpty(children) && hasOccurrences();
}
public boolean isBranch() {
return CollectionUtils.notEmpty(children) && !hasOccurrences();
}
public boolean isNub() {
return CollectionUtils.notEmpty(children) && hasOccurrences();
}
/********************** simple mutation methods *************************/
/**
* Each occurrence > 1 indicates a repeat of the previous entry. This can be called directly by
* an external class without going through the process of detecting a repeat if it is a known
* repeat by some external mechanism. PtEncoder uses this when adding cells to a row if it knows
* the new cells are part of the current row.
* @param d increment by this amount
*/
public void incrementNumOccurrences(int d) {
numOccurrences += d;
}
/************************* autogenerated get/set ******************/
public int getTokenOffset() {
return tokenStartOffset;
}
public TokenizerNode getParent() {
return parent;
}
public ByteRange getToken() {
return token;
}
public int getNumOccurrences() {
return numOccurrences;
}
public void setParent(TokenizerNode parent) {
this.parent = parent;
}
public void setNumOccurrences(int numOccurrences) {
this.numOccurrences = numOccurrences;
}
public ArrayList<TokenizerNode> getChildren() {
return children;
}
public long getId() {
return id;
}
public int getFirstInsertionIndex() {
return firstInsertionIndex;
}
public void setFirstInsertionIndex(int firstInsertionIndex) {
this.firstInsertionIndex = firstInsertionIndex;
}
public int getNegativeIndex() {
return negativeIndex;
}
public void setNegativeIndex(int negativeIndex) {
this.negativeIndex = negativeIndex;
}
public int getOutputArrayOffset() {
return outputArrayOffset;
}
public void setOutputArrayOffset(int outputArrayOffset) {
this.outputArrayOffset = outputArrayOffset;
}
public void setId(long id) {
this.id = id;
}
public void setBuilder(Tokenizer builder) {
this.builder = builder;
}
public void setTokenOffset(int tokenOffset) {
this.tokenStartOffset = tokenOffset;
}
public void setToken(ByteRange token) {
this.token = token;
}
}

View File

@ -0,0 +1,38 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hbase.codec.prefixtree.encode.tokenize;
import org.apache.hadoop.classification.InterfaceAudience;
/**
* Warning: currently unused, but code is valid. Pending performance testing on more data sets.
*
* Where is the key relative to our current position in the tree. For example, the current tree node
* is "BEFORE" the key we are seeking
*/
@InterfaceAudience.Private
public enum TokenizerRowSearchPosition {
AFTER,//the key is after this tree node, so keep searching
BEFORE,//in a binary search, this tells us to back up
MATCH,//the current node is a full match
NO_MATCH,//might as well return a value more informative than null
}

View File

@ -0,0 +1,73 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hbase.codec.prefixtree.encode.tokenize;
import org.apache.hadoop.classification.InterfaceAudience;
/**
* for recursively searching a PtBuilder
*/
@InterfaceAudience.Private
public class TokenizerRowSearchResult{
/************ fields ************************/
protected TokenizerRowSearchPosition difference;
protected TokenizerNode matchingNode;
/*************** construct *****************/
public TokenizerRowSearchResult() {
}
public TokenizerRowSearchResult(TokenizerRowSearchPosition difference) {
this.difference = difference;
}
public TokenizerRowSearchResult(TokenizerNode matchingNode) {
this.difference = TokenizerRowSearchPosition.MATCH;
this.matchingNode = matchingNode;
}
/*************** methods **********************/
public boolean isMatch() {
return TokenizerRowSearchPosition.MATCH == difference;
}
/************* get/set ***************************/
public TokenizerRowSearchPosition getDifference() {
return difference;
}
public TokenizerNode getMatchingNode() {
return matchingNode;
}
public void set(TokenizerRowSearchPosition difference, TokenizerNode matchingNode) {
this.difference = difference;
this.matchingNode = matchingNode;
}
}

View File

@ -0,0 +1,71 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hbase.codec.prefixtree.scanner;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hbase.Cell;
/**
* Alternate name may be CellInputStream
* <p/>
* An interface for iterating through a sequence of cells. Similar to Java's Iterator, but without
* the hasNext() or remove() methods. The hasNext() method is problematic because it may require
* actually loading the next object, which in turn requires storing the previous object somewhere.
* The core data block decoder should be as fast as possible, so we push the complexity and
* performance expense of concurrently tracking multiple cells to layers above the CellScanner.
* <p/>
* The getCurrentCell() method will return a reference to a Cell implementation. This reference may
* or may not point to a reusable cell implementation, so users of the CellScanner should not, for
* example, accumulate a List of Cells. All of the references may point to the same object, which
* would be the latest state of the underlying Cell. In short, the Cell is mutable.
* <p/>
* At a minimum, an implementation will need to be able to advance from one cell to the next in a
* LinkedList fashion. The nextQualifier(), nextFamily(), and nextRow() methods can all be
* implemented by calling nextCell(), however, if the DataBlockEncoding supports random access into
* the block then it may provide smarter versions of these methods.
* <p/>
* Typical usage:
*
* <pre>
* while (scanner.nextCell()) {
* Cell cell = scanner.getCurrentCell();
* // do something
* }
* </pre>
*/
@InterfaceAudience.Private
public interface CellScanner{
/**
* Reset any state in the scanner so it appears it was freshly opened.
*/
void resetToBeforeFirstEntry();
/**
* @return the current Cell which may be mutable
*/
Cell getCurrent();
/**
* Advance the scanner 1 cell.
* @return true if the next cell is found and getCurrentCell() will return a valid Cell
*/
boolean next();
}

View File

@ -0,0 +1,107 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hbase.codec.prefixtree.scanner;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hbase.Cell;
import org.apache.hbase.cell.CellScannerPosition;
/**
* Methods for seeking to a random {@link Cell} inside a sorted collection of cells. Indicates that
* the implementation is able to navigate between cells without iterating through every cell.
*/
@InterfaceAudience.Private
public interface CellSearcher extends ReversibleCellScanner {
/**
* Do everything within this scanner's power to find the key. Look forward and backwards.
* <p/>
* Abort as soon as we know it can't be found, possibly leaving the Searcher in an invalid state.
* <p/>
* @param key position the CellScanner exactly on this key
* @return true if the cell existed and getCurrentCell() holds a valid cell
*/
boolean positionAt(Cell key);
/**
* Same as positionAt(..), but go to the extra effort of finding the previous key if there's no
* exact match.
* <p/>
* @param key position the CellScanner on this key or the closest cell before
* @return AT if exact match<br/>
* BEFORE if on last cell before key<br/>
* BEFORE_FIRST if key was before the first cell in this scanner's scope
*/
CellScannerPosition positionAtOrBefore(Cell key);
/**
* Same as positionAt(..), but go to the extra effort of finding the next key if there's no exact
* match.
* <p/>
* @param key position the CellScanner on this key or the closest cell after
* @return AT if exact match<br/>
* AFTER if on first cell after key<br/>
* AFTER_LAST if key was after the last cell in this scanner's scope
*/
CellScannerPosition positionAtOrAfter(Cell key);
/**
* Note: Added for backwards compatibility with
* {@link org.apache.hadoop.hbase.regionserver.KeyValueScanner#reseek}
* <p/>
* Look for the key, but only look after the current position. Probably not needed for an
* efficient tree implementation, but is important for implementations without random access such
* as unencoded KeyValue blocks.
* <p/>
* @param key position the CellScanner exactly on this key
* @return true if getCurrent() holds a valid cell
*/
boolean seekForwardTo(Cell key);
/**
* Same as seekForwardTo(..), but go to the extra effort of finding the next key if there's no
* exact match.
* <p/>
* @param key
* @return AT if exact match<br/>
* AFTER if on first cell after key<br/>
* AFTER_LAST if key was after the last cell in this scanner's scope
*/
CellScannerPosition seekForwardToOrBefore(Cell key);
/**
* Same as seekForwardTo(..), but go to the extra effort of finding the next key if there's no
* exact match.
* <p/>
* @param key
* @return AT if exact match<br/>
* AFTER if on first cell after key<br/>
* AFTER_LAST if key was after the last cell in this scanner's scope
*/
CellScannerPosition seekForwardToOrAfter(Cell key);
/**
* Note: This may not be appropriate to have in the interface. Need to investigate.
* <p/>
* Position the scanner in an invalid state after the last cell: CellScannerPosition.AFTER_LAST.
* This is used by tests and for handling certain edge cases.
*/
void positionAfterLastCell();
}

View File

@ -0,0 +1,52 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hbase.codec.prefixtree.scanner;
import org.apache.hadoop.classification.InterfaceAudience;
/**
* An extension of CellScanner indicating the scanner supports iterating backwards through cells.
* <p>
* Note: This was not added to suggest that HBase should support client facing reverse Scanners, but
* because some {@link CellSearcher} implementations, namely PrefixTree, need a method of backing up
* if the positionAt(..) method goes past the requested cell.
*/
@InterfaceAudience.Private
public interface ReversibleCellScanner extends CellScanner {
/**
* Try to position the scanner one Cell before the current position.
* @return true if the operation was successful, meaning getCurrentCell() will return a valid
* Cell.<br/>
* false if there were no previous cells, meaning getCurrentCell() will return null.
* Scanner position will be {@link org.apache.hbase.cell.CellScannerPosition#BEFORE_FIRST}
*/
boolean previous();
/**
* Try to position the scanner in the row before the current row.
* @param endOfRow true for the last cell in the previous row; false for the first cell
* @return true if the operation was successful, meaning getCurrentCell() will return a valid
* Cell.<br/>
* false if there were no previous cells, meaning getCurrentCell() will return null.
* Scanner position will be {@link org.apache.hbase.cell.CellScannerPosition#BEFORE_FIRST}
*/
boolean previousRow(boolean endOfRow);
}

View File

@ -0,0 +1,180 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hbase.util.byterange;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.hbase.util.ArrayUtils;
import org.apache.hadoop.hbase.util.ByteRange;
import org.apache.hadoop.hbase.util.Bytes;
import com.google.common.collect.Lists;
/**
* Performance oriented class for de-duping and storing arbitrary byte[]'s arriving in non-sorted
* order. Appends individual byte[]'s to a single big byte[] to avoid overhead and garbage.
* <p>
* Current implementations are {@link org.apache.hbase.util.byterange.impl.ByteRangeHashSet} and
* {@link org.apache.hbase.util.byterange.impl.ByteRangeTreeSet}, but other options might be a
* trie-oriented ByteRangeTrieSet, etc
*/
@InterfaceAudience.Private
public abstract class ByteRangeSet {
/******************** fields **********************/
protected byte[] byteAppender;
protected int numBytes;
protected Map<ByteRange, Integer> uniqueIndexByUniqueRange;
protected ArrayList<ByteRange> uniqueRanges;
protected int numUniqueRanges = 0;
protected int[] uniqueRangeIndexByInsertionId;
protected int numInputs;
protected List<Integer> sortedIndexByUniqueIndex;
protected int[] sortedIndexByInsertionId;
protected ArrayList<ByteRange> sortedRanges;
/****************** construct **********************/
protected ByteRangeSet() {
this.byteAppender = new byte[0];
this.uniqueRanges = Lists.newArrayList();
this.uniqueRangeIndexByInsertionId = new int[0];
this.sortedIndexByUniqueIndex = Lists.newArrayList();
this.sortedIndexByInsertionId = new int[0];
this.sortedRanges = Lists.newArrayList();
}
public void reset() {
numBytes = 0;
uniqueIndexByUniqueRange.clear();
numUniqueRanges = 0;
numInputs = 0;
sortedIndexByUniqueIndex.clear();
sortedRanges.clear();
}
/*************** abstract *************************/
public abstract void addToSortedRanges();
/**************** methods *************************/
/**
* Check if the incoming byte range exists. If not, add it to the backing byteAppender[] and
* insert it into the tracking Map uniqueIndexByUniqueRange.
*/
public void add(ByteRange bytes) {
Integer index = uniqueIndexByUniqueRange.get(bytes);
if (index == null) {
index = store(bytes);
}
int minLength = numInputs + 1;
uniqueRangeIndexByInsertionId = ArrayUtils.growIfNecessary(uniqueRangeIndexByInsertionId,
minLength, 2 * minLength);
uniqueRangeIndexByInsertionId[numInputs] = index;
++numInputs;
}
protected int store(ByteRange bytes) {
int indexOfNewElement = numUniqueRanges;
if (uniqueRanges.size() <= numUniqueRanges) {
uniqueRanges.add(new ByteRange());
}
ByteRange storedRange = uniqueRanges.get(numUniqueRanges);
int neededBytes = numBytes + bytes.getLength();
byteAppender = ArrayUtils.growIfNecessary(byteAppender, neededBytes, 2 * neededBytes);
bytes.deepCopyTo(byteAppender, numBytes);
storedRange.set(byteAppender, numBytes, bytes.getLength());// this isn't valid yet
numBytes += bytes.getLength();
uniqueIndexByUniqueRange.put(storedRange, indexOfNewElement);
int newestUniqueIndex = numUniqueRanges;
++numUniqueRanges;
return newestUniqueIndex;
}
public ByteRangeSet compile() {
addToSortedRanges();
for (int i = 0; i < sortedRanges.size(); ++i) {
sortedIndexByUniqueIndex.add(null);// need to grow the size
}
// TODO move this to an invert(int[]) util method
for (int i = 0; i < sortedIndexByUniqueIndex.size(); ++i) {
int uniqueIndex = uniqueIndexByUniqueRange.get(sortedRanges.get(i));
sortedIndexByUniqueIndex.set(uniqueIndex, i);
}
sortedIndexByInsertionId = ArrayUtils.growIfNecessary(sortedIndexByInsertionId, numInputs,
numInputs);
for (int i = 0; i < numInputs; ++i) {
int uniqueRangeIndex = uniqueRangeIndexByInsertionId[i];
int sortedIndex = sortedIndexByUniqueIndex.get(uniqueRangeIndex);
sortedIndexByInsertionId[i] = sortedIndex;
}
return this;
}
public int getSortedIndexForInsertionId(int insertionId) {
return sortedIndexByInsertionId[insertionId];
}
public int size() {
return uniqueIndexByUniqueRange.size();
}
/***************** standard methods ************************/
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
int i = 0;
for (ByteRange r : sortedRanges) {
if (i > 0) {
sb.append("\n");
}
sb.append(i + " " + Bytes.toStringBinary(r.deepCopyToNewArray()));
++i;
}
sb.append("\ntotalSize:" + numBytes);
sb.append("\navgSize:" + getAvgSize());
return sb.toString();
}
/**************** get/set *****************************/
public ArrayList<ByteRange> getSortedRanges() {
return sortedRanges;
}
public long getAvgSize() {
return numBytes / numUniqueRanges;
}
}

View File

@ -0,0 +1,57 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hbase.util.byterange.impl;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.hbase.util.ByteRange;
import org.apache.hadoop.hbase.util.CollectionUtils;
import org.apache.hadoop.hbase.util.IterableUtils;
import org.apache.hbase.util.byterange.ByteRangeSet;
/**
* This is probably the best implementation of ByteRangeSet at the moment, though a HashMap produces
* garbage when adding a new element to it. We can probably create a tighter implementation without
* pointers or garbage.
*/
@InterfaceAudience.Private
public class ByteRangeHashSet extends ByteRangeSet {
/************************ constructors *****************************/
public ByteRangeHashSet() {
this.uniqueIndexByUniqueRange = new HashMap<ByteRange, Integer>();
}
public ByteRangeHashSet(List<ByteRange> rawByteArrays) {
for (ByteRange in : IterableUtils.nullSafe(rawByteArrays)) {
add(in);
}
}
@Override
public void addToSortedRanges() {
sortedRanges.addAll(CollectionUtils.nullSafe(uniqueIndexByUniqueRange.keySet()));
Collections.sort(sortedRanges);
}
}

View File

@ -0,0 +1,54 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hbase.util.byterange.impl;
import java.util.List;
import java.util.TreeMap;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.hbase.util.ByteRange;
import org.apache.hadoop.hbase.util.CollectionUtils;
import org.apache.hadoop.hbase.util.IterableUtils;
import org.apache.hbase.util.byterange.ByteRangeSet;
/**
* Not currently used in production, but here as a benchmark comparison against ByteRangeHashSet.
*/
@InterfaceAudience.Private
public class ByteRangeTreeSet extends ByteRangeSet {
/************************ constructors *****************************/
public ByteRangeTreeSet() {
this.uniqueIndexByUniqueRange = new TreeMap<ByteRange,Integer>();
}
public ByteRangeTreeSet(List<ByteRange> rawByteArrays) {
this();//needed to initialize the TreeSet
for(ByteRange in : IterableUtils.nullSafe(rawByteArrays)){
add(in);
}
}
@Override
public void addToSortedRanges() {
sortedRanges.addAll(CollectionUtils.nullSafe(uniqueIndexByUniqueRange.keySet()));
}
}

View File

@ -0,0 +1,116 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hbase.util.vint;
import java.io.IOException;
import java.io.OutputStream;
import org.apache.hadoop.classification.InterfaceAudience;
/**
* UFInt is an abbreviation for Unsigned Fixed-width Integer.
*
* This class converts between positive ints and 1-4 bytes that represent the int. All input ints
* must be positive. Max values stored in N bytes are:
*
* N=1: 2^8 => 256
* N=2: 2^16 => 65,536
* N=3: 2^24 => 16,777,216
* N=4: 2^31 => 2,147,483,648 (Integer.MAX_VALUE)
*
* This was created to get most of the memory savings of a variable length integer when encoding
* an array of input integers, but to fix the number of bytes for each integer to the number needed
* to store the maximum integer in the array. This enables a binary search to be performed on the
* array of encoded integers.
*
* PrefixTree nodes often store offsets into a block that can fit into 1 or 2 bytes. Note that if
* the maximum value of an array of numbers needs 2 bytes, then it's likely that a majority of the
* numbers will also require 2 bytes.
*
* warnings:
* * no input validation for max performance
* * no negatives
*/
@InterfaceAudience.Private
public class UFIntTool {
private static final int NUM_BITS_IN_LONG = 64;
public static long maxValueForNumBytes(int numBytes) {
return (1L << (numBytes * 8)) - 1;
}
public static int numBytes(final long value) {
if (value == 0) {// 0 doesn't work with the formula below
return 1;
}
return (NUM_BITS_IN_LONG + 7 - Long.numberOfLeadingZeros(value)) / 8;
}
public static byte[] getBytes(int outputWidth, final long value) {
byte[] bytes = new byte[outputWidth];
writeBytes(outputWidth, value, bytes, 0);
return bytes;
}
public static void writeBytes(int outputWidth, final long value, byte[] bytes, int offset) {
bytes[offset + outputWidth - 1] = (byte) value;
for (int i = outputWidth - 2; i >= 0; --i) {
bytes[offset + i] = (byte) (value >>> (outputWidth - i - 1) * 8);
}
}
private static final long[] MASKS = new long[] {
(long) 255,
(long) 255 << 8,
(long) 255 << 16,
(long) 255 << 24,
(long) 255 << 32,
(long) 255 << 40,
(long) 255 << 48,
(long) 255 << 56
};
public static void writeBytes(int outputWidth, final long value, OutputStream os) throws IOException {
for (int i = outputWidth - 1; i >= 0; --i) {
os.write((byte) ((value & MASKS[i]) >>> (8 * i)));
}
}
public static long fromBytes(final byte[] bytes) {
long value = 0;
value |= bytes[0] & 0xff;// these seem to do ok without casting the byte to int
for (int i = 1; i < bytes.length; ++i) {
value <<= 8;
value |= bytes[i] & 0xff;
}
return value;
}
public static long fromBytes(final byte[] bytes, final int offset, final int width) {
long value = 0;
value |= bytes[0 + offset] & 0xff;// these seem to do ok without casting the byte to int
for (int i = 1; i < width; ++i) {
value <<= 8;
value |= bytes[i + offset] & 0xff;
}
return value;
}
}

View File

@ -0,0 +1,115 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hbase.util.vint;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import org.apache.hadoop.classification.InterfaceAudience;
/**
* Simple Variable Length Integer encoding. Left bit of 0 means we are on the last byte. If left
* bit of the current byte is 1, then there is at least one more byte.
*/
@InterfaceAudience.Private
public class UVIntTool {
public static final byte
BYTE_7_RIGHT_BITS_SET = 127,
BYTE_LEFT_BIT_SET = -128;
public static final long
INT_7_RIGHT_BITS_SET = 127,
INT_8TH_BIT_SET = 128;
public static final byte[]
MAX_VALUE_BYTES = new byte[] { -1, -1, -1, -1, 7 };
/********************* int -> bytes **************************/
public static int numBytes(int in) {
if (in == 0) {
// doesn't work with the formula below
return 1;
}
return (38 - Integer.numberOfLeadingZeros(in)) / 7;// 38 comes from 32+(7-1)
}
public static byte[] getBytes(int value) {
int numBytes = numBytes(value);
byte[] bytes = new byte[numBytes];
int remainder = value;
for (int i = 0; i < numBytes - 1; ++i) {
// set the left bit
bytes[i] = (byte) ((remainder & INT_7_RIGHT_BITS_SET) | INT_8TH_BIT_SET);
remainder >>= 7;
}
// do not set the left bit
bytes[numBytes - 1] = (byte) (remainder & INT_7_RIGHT_BITS_SET);
return bytes;
}
public static int writeBytes(int value, OutputStream os) throws IOException {
int numBytes = numBytes(value);
int remainder = value;
for (int i = 0; i < numBytes - 1; ++i) {
// set the left bit
os.write((byte) ((remainder & INT_7_RIGHT_BITS_SET) | INT_8TH_BIT_SET));
remainder >>= 7;
}
// do not set the left bit
os.write((byte) (remainder & INT_7_RIGHT_BITS_SET));
return numBytes;
}
/******************** bytes -> int **************************/
public static int getInt(byte[] bytes) {
return getInt(bytes, 0);
}
public static int getInt(byte[] bytes, int offset) {
int value = 0;
for (int i = 0;; ++i) {
byte b = bytes[offset + i];
int shifted = BYTE_7_RIGHT_BITS_SET & b;// kill leftmost bit
shifted <<= 7 * i;
value |= shifted;
if (b >= 0) {
break;
}
}
return value;
}
public static int getInt(InputStream is) throws IOException {
int value = 0;
int i = 0;
int b;
do{
b = is.read();
int shifted = BYTE_7_RIGHT_BITS_SET & b;// kill leftmost bit
shifted <<= 7 * i;
value |= shifted;
++i;
}while(b > Byte.MAX_VALUE);
return value;
}
}

View File

@ -0,0 +1,113 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hbase.util.vint;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import org.apache.hadoop.classification.InterfaceAudience;
/**
* Simple Variable Length Integer encoding. Left bit of 0 means we are on the last byte. If left
* bit of the current byte is 1, then there is at least one more byte.
*/
@InterfaceAudience.Private
public class UVLongTool{
public static final byte
BYTE_7_RIGHT_BITS_SET = 127,
BYTE_LEFT_BIT_SET = -128;
public static final long
LONG_7_RIGHT_BITS_SET = 127,
LONG_8TH_BIT_SET = 128;
public static final byte[]
MAX_VALUE_BYTES = new byte[] { -1, -1, -1, -1, -1, -1, -1, -1, 127 };
/********************* long -> bytes **************************/
public static int numBytes(long in) {// do a check for illegal arguments if not protected
if (in == 0) {
return 1;
}// doesn't work with the formula below
return (70 - Long.numberOfLeadingZeros(in)) / 7;// 70 comes from 64+(7-1)
}
public static byte[] getBytes(long value) {
int numBytes = numBytes(value);
byte[] bytes = new byte[numBytes];
long remainder = value;
for (int i = 0; i < numBytes - 1; ++i) {
bytes[i] = (byte) ((remainder & LONG_7_RIGHT_BITS_SET) | LONG_8TH_BIT_SET);// set the left bit
remainder >>= 7;
}
bytes[numBytes - 1] = (byte) (remainder & LONG_7_RIGHT_BITS_SET);// do not set the left bit
return bytes;
}
public static int writeBytes(long value, OutputStream os) throws IOException {
int numBytes = numBytes(value);
long remainder = value;
for (int i = 0; i < numBytes - 1; ++i) {
// set the left bit
os.write((byte) ((remainder & LONG_7_RIGHT_BITS_SET) | LONG_8TH_BIT_SET));
remainder >>= 7;
}
// do not set the left bit
os.write((byte) (remainder & LONG_7_RIGHT_BITS_SET));
return numBytes;
}
/******************** bytes -> long **************************/
public static long getLong(byte[] bytes) {
return getLong(bytes, 0);
}
public static long getLong(byte[] bytes, int offset) {
long value = 0;
for (int i = 0;; ++i) {
byte b = bytes[offset + i];
long shifted = BYTE_7_RIGHT_BITS_SET & b;// kill leftmost bit
shifted <<= 7 * i;
value |= shifted;
if (b >= 0) {
break;
}// first bit was 0, so that's the last byte in the VarLong
}
return value;
}
public static long getLong(InputStream is) throws IOException {
long value = 0;
int i = 0;
int b;
do {
b = is.read();
long shifted = BYTE_7_RIGHT_BITS_SET & b;// kill leftmost bit
shifted <<= 7 * i;
value |= shifted;
++i;
} while (b > Byte.MAX_VALUE);
return value;
}
}

View File

@ -0,0 +1,55 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hbase.codec.keyvalue;
import java.nio.ByteBuffer;
import java.util.Collection;
import java.util.List;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.KeyValueTestUtil;
import org.apache.hbase.codec.prefixtree.row.TestRowData;
import org.junit.Assert;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
import org.junit.runners.Parameterized.Parameters;
@RunWith(Parameterized.class)
public class TestKeyValueTool {
@Parameters
public static Collection<Object[]> parameters() {
return new TestRowData.InMemory().getAllAsObjectArray();
}
private TestRowData rows;
public TestKeyValueTool(TestRowData testRows) {
this.rows = testRows;
}
@Test
public void testRoundTripToBytes() {
List<KeyValue> kvs = rows.getInputs();
ByteBuffer bb = KeyValueTestUtil.toByteBufferAndRewind(kvs, false);
List<KeyValue> roundTrippedKvs = KeyValueTestUtil.rewindThenToList(bb, false);
Assert.assertArrayEquals(kvs.toArray(), roundTrippedKvs.toArray());
}
}

View File

@ -0,0 +1,27 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hbase.codec.prefixtree;
import org.apache.hadoop.hbase.util.Bytes;
public class PrefixTreeTestConstants {
public static final byte[] TEST_CF = Bytes.toBytes("cfDefault");
}

View File

@ -0,0 +1,82 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hbase.codec.prefixtree.blockmeta;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.nio.ByteBuffer;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hbase.codec.prefixtree.PrefixTreeBlockMeta;
import org.junit.Assert;
import org.junit.Test;
public class TestBlockMeta {
static int BLOCK_START = 123;
private static PrefixTreeBlockMeta createSample() {
PrefixTreeBlockMeta m = new PrefixTreeBlockMeta();
m.setNumMetaBytes(0);
m.setNumKeyValueBytes(3195);
m.setNumRowBytes(0);
m.setNumFamilyBytes(3);
m.setNumQualifierBytes(12345);
m.setNumTimestampBytes(23456);
m.setNumMvccVersionBytes(5);
m.setNumValueBytes(34567);
m.setNextNodeOffsetWidth(3);
m.setFamilyOffsetWidth(1);
m.setQualifierOffsetWidth(2);
m.setTimestampIndexWidth(1);
m.setMvccVersionIndexWidth(2);
m.setValueOffsetWidth(8);
m.setValueLengthWidth(3);
m.setRowTreeDepth(11);
m.setMaxRowLength(200);
m.setMaxQualifierLength(50);
m.setMinTimestamp(1318966363481L);
m.setTimestampDeltaWidth(3);
m.setMinMvccVersion(100L);
m.setMvccVersionDeltaWidth(4);
m.setAllSameType(false);
m.setAllTypes(KeyValue.Type.Delete.getCode());
m.setNumUniqueRows(88);
m.setNumUniqueFamilies(1);
m.setNumUniqueQualifiers(56);
return m;
}
@Test
public void testStreamSerialization() throws IOException {
PrefixTreeBlockMeta original = createSample();
ByteArrayOutputStream os = new ByteArrayOutputStream(10000);
original.writeVariableBytesToOutputStream(os);
ByteBuffer buffer = ByteBuffer.wrap(os.toByteArray());
PrefixTreeBlockMeta roundTripped = new PrefixTreeBlockMeta(buffer);
Assert.assertTrue(original.equals(roundTripped));
}
}

View File

@ -0,0 +1,74 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hbase.codec.prefixtree.builder;
import java.util.Collection;
import java.util.List;
import org.apache.hadoop.hbase.util.ByteRange;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hbase.codec.prefixtree.encode.tokenize.Tokenizer;
import org.apache.hbase.codec.prefixtree.encode.tokenize.TokenizerNode;
import org.apache.hbase.codec.prefixtree.encode.tokenize.TokenizerRowSearchResult;
import org.junit.Assert;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
import org.junit.runners.Parameterized.Parameters;
@RunWith(Parameterized.class)
public class TestTokenizer {
@Parameters
public static Collection<Object[]> parameters() {
return new TestTokenizerData.InMemory().getAllAsObjectArray();
}
private List<byte[]> inputs;
private Tokenizer builder;
private List<byte[]> roundTripped;
public TestTokenizer(TestTokenizerData sortedByteArrays) {
this.inputs = sortedByteArrays.getInputs();
this.builder = new Tokenizer();
for (byte[] array : inputs) {
builder.addSorted(new ByteRange(array));
}
this.roundTripped = builder.getArrays();
}
@Test
public void testReaderRoundTrip() {
Assert.assertEquals(inputs.size(), roundTripped.size());
Assert.assertTrue(Bytes.isSorted(roundTripped));
Assert.assertTrue(Bytes.equals(inputs, roundTripped));
}
@Test
public void testSearching() {
for (byte[] input : inputs) {
TokenizerRowSearchResult resultHolder = new TokenizerRowSearchResult();
builder.getNode(resultHolder, input, 0, input.length);
TokenizerNode n = resultHolder.getMatchingNode();
byte[] output = n.getNewByteArray();
Assert.assertTrue(Bytes.equals(input, output));
}
}
}

View File

@ -0,0 +1,42 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hbase.codec.prefixtree.builder;
import java.util.Collection;
import java.util.List;
import org.apache.hbase.codec.prefixtree.builder.data.TestTokenizerDataBasic;
import org.apache.hbase.codec.prefixtree.builder.data.TestTokenizerDataEdgeCase;
import com.google.common.collect.Lists;
public interface TestTokenizerData {
List<byte[]> getInputs();
List<byte[]> getOutputs();
public static class InMemory {
public Collection<Object[]> getAllAsObjectArray() {
List<Object[]> all = Lists.newArrayList();
all.add(new Object[] { new TestTokenizerDataBasic() });
all.add(new Object[] { new TestTokenizerDataEdgeCase() });
return all;
}
}
}

View File

@ -0,0 +1,87 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hbase.codec.prefixtree.builder;
import java.util.List;
import org.apache.hadoop.hbase.util.ByteRange;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hbase.codec.prefixtree.encode.tokenize.Tokenizer;
import org.junit.Assert;
import org.junit.Test;
import org.mortbay.log.Log;
import com.google.common.collect.Lists;
public class TestTreeDepth {
@Test
public void testSingleNode() {
List<String> inputs = Lists.newArrayList("a");
testInternal(inputs, 1);
}
@Test
public void testSimpleBranch() {
List<String> inputs = Lists.newArrayList("a", "aa", "ab");
testInternal(inputs, 2);
}
@Test
public void testEmptyRoot() {
List<String> inputs = Lists.newArrayList("a", "b");
testInternal(inputs, 2);
}
@Test
public void testRootAsNub() {
List<String> inputs = Lists.newArrayList("a", "aa");
testInternal(inputs, 2);
}
@Test
public void testRootAsNubPlusNub() {
List<String> inputs = Lists.newArrayList("a", "aa", "aaa");
testInternal(inputs, 3);
}
@Test
public void testEmptyRootPlusNub() {
List<String> inputs = Lists.newArrayList("a", "aa", "b");
testInternal(inputs, 3);
}
@Test
public void testSplitDistantAncestor() {
List<String> inputs = Lists.newArrayList("a", "ac", "acd", "b");
testInternal(inputs, 4);
}
protected void testInternal(List<String> inputs, int expectedTreeDepth) {
Log.warn("init logger");
Tokenizer builder = new Tokenizer();
for (String s : inputs) {
ByteRange b = new ByteRange(Bytes.toBytes(s));
builder.addSorted(b);
}
Assert.assertEquals(1, builder.getRoot().getNodeDepth());
Assert.assertEquals(expectedTreeDepth, builder.getTreeDepth());
}
}

View File

@ -0,0 +1,51 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hbase.codec.prefixtree.builder.data;
import java.util.List;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hbase.codec.prefixtree.builder.TestTokenizerData;
import com.google.common.collect.Lists;
public class TestTokenizerDataBasic implements TestTokenizerData {
static List<byte[]> d = Lists.newArrayList();
static {
List<String> s = Lists.newArrayList();
s.add("abc");// nub
s.add("abcde");// leaf
s.add("bbc");// causes root to split and have empty token
s.add("bbc");// makes numOccurrences=2 on the bbc node
s.add("cd");// just to get another node after the numOccurrences=2
d = Bytes.getUtf8ByteArrays(s);
}
@Override
public List<byte[]> getInputs() {
return d;
}
@Override
public List<byte[]> getOutputs() {
return d;
}
}

View File

@ -0,0 +1,53 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hbase.codec.prefixtree.builder.data;
import java.util.List;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hbase.codec.prefixtree.builder.TestTokenizerData;
import com.google.common.collect.Lists;
public class TestTokenizerDataEdgeCase implements TestTokenizerData {
static List<byte[]> d = Lists.newArrayList();
static {
/*
* tricky little combination because the acegi token will partially match abdfi, but when you
* descend into abdfi, it will not fully match
*/
List<String> s = Lists.newArrayList();
s.add("abdfh");
s.add("abdfi");
s.add("acegi");
d = Bytes.getUtf8ByteArrays(s);
}
@Override
public List<byte[]> getInputs() {
return d;
}
@Override
public List<byte[]> getOutputs() {
return d;
}
}

View File

@ -0,0 +1,120 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hbase.codec.prefixtree.column;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.util.Collection;
import java.util.List;
import org.apache.hadoop.hbase.util.ByteRange;
import org.apache.hadoop.hbase.util.ByteRangeTool;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hbase.codec.prefixtree.PrefixTreeBlockMeta;
import org.apache.hbase.codec.prefixtree.decode.column.ColumnReader;
import org.apache.hbase.codec.prefixtree.encode.column.ColumnSectionWriter;
import org.apache.hbase.codec.prefixtree.encode.tokenize.Tokenizer;
import org.apache.hbase.codec.prefixtree.encode.tokenize.TokenizerNode;
import org.apache.hbase.util.byterange.impl.ByteRangeTreeSet;
import org.junit.Assert;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
import org.junit.runners.Parameterized.Parameters;
import com.google.common.collect.Lists;
@RunWith(Parameterized.class)
public class TestColumnBuilder {
@Parameters
public static Collection<Object[]> parameters() {
return new TestColumnData.InMemory().getAllAsObjectArray();
}
/*********** fields **********************************/
protected TestColumnData columns;
protected ByteRangeTreeSet columnSorter;
protected List<ByteRange> sortedUniqueColumns;
protected PrefixTreeBlockMeta blockMeta;
protected Tokenizer builder;
protected ColumnSectionWriter writer;
protected byte[] bytes;
protected byte[] buffer;
protected ColumnReader reader;
/*************** construct ****************************/
public TestColumnBuilder(TestColumnData columns) {
this.columns = columns;
List<ByteRange> inputs = columns.getInputs();
this.columnSorter = new ByteRangeTreeSet(inputs);
this.sortedUniqueColumns = columnSorter.compile().getSortedRanges();
List<byte[]> copies = ByteRangeTool.copyToNewArrays(sortedUniqueColumns);
Assert.assertTrue(Bytes.isSorted(copies));
this.blockMeta = new PrefixTreeBlockMeta();
this.blockMeta.setNumMetaBytes(0);
this.blockMeta.setNumRowBytes(0);
this.builder = new Tokenizer();
}
/************* methods ********************************/
@Test
public void testReaderRoundTrip() throws IOException {
for (int i = 0; i < sortedUniqueColumns.size(); ++i) {
ByteRange column = sortedUniqueColumns.get(i);
builder.addSorted(column);
}
List<byte[]> builderOutputArrays = builder.getArrays();
for (int i = 0; i < builderOutputArrays.size(); ++i) {
byte[] inputArray = sortedUniqueColumns.get(i).deepCopyToNewArray();
byte[] outputArray = builderOutputArrays.get(i);
boolean same = Bytes.equals(inputArray, outputArray);
Assert.assertTrue(same);
}
Assert.assertEquals(sortedUniqueColumns.size(), builderOutputArrays.size());
writer = new ColumnSectionWriter(blockMeta, builder, false);
ByteArrayOutputStream baos = new ByteArrayOutputStream();
writer.compile().writeBytes(baos);
bytes = baos.toByteArray();
buffer = new byte[blockMeta.getMaxQualifierLength()];
reader = new ColumnReader(buffer, false);
reader.initOnBlock(blockMeta, bytes);
List<TokenizerNode> builderNodes = Lists.newArrayList();
builder.appendNodes(builderNodes, true, true);
int i = 0;
for (TokenizerNode builderNode : builderNodes) {
if (!builderNode.hasOccurrences()) {
continue;
}
Assert.assertEquals(1, builderNode.getNumOccurrences());// we de-duped before adding to
// builder
int position = builderNode.getOutputArrayOffset();
byte[] output = reader.populateBuffer(position).copyBufferToNewArray();
boolean same = Bytes.equals(sortedUniqueColumns.get(i).deepCopyToNewArray(), output);
Assert.assertTrue(same);
++i;
}
}
}

View File

@ -0,0 +1,45 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hbase.codec.prefixtree.column;
import java.util.Collection;
import java.util.List;
import org.apache.hadoop.hbase.util.ByteRange;
import org.apache.hbase.codec.prefixtree.column.data.TestColumnDataRandom;
import org.apache.hbase.codec.prefixtree.column.data.TestColumnDataSimple;
import com.google.common.collect.Lists;
public interface TestColumnData {
List<ByteRange> getInputs();
List<ByteRange> getOutputs();
public static class InMemory {
public Collection<Object[]> getAllAsObjectArray() {
List<Object[]> all = Lists.newArrayList();
all.add(new Object[] { new TestColumnDataSimple() });
for (int leftShift = 0; leftShift < 16; ++leftShift) {
all.add(new Object[] { new TestColumnDataRandom(1 << leftShift) });
}
return all;
}
}
}

View File

@ -0,0 +1,61 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hbase.codec.prefixtree.column.data;
import java.util.List;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.util.ByteRange;
import org.apache.hadoop.hbase.util.test.RedundantKVGenerator;
import org.apache.hbase.codec.prefixtree.column.TestColumnData;
import org.apache.hbase.util.byterange.ByteRangeSet;
import org.apache.hbase.util.byterange.impl.ByteRangeTreeSet;
import com.google.common.collect.Lists;
public class TestColumnDataRandom implements TestColumnData {
private List<ByteRange> inputs = Lists.newArrayList();
private List<ByteRange> outputs = Lists.newArrayList();
public TestColumnDataRandom(int numColumns) {
RedundantKVGenerator generator = new RedundantKVGenerator();
ByteRangeSet sortedColumns = new ByteRangeTreeSet();
List<KeyValue> d = generator.generateTestKeyValues(numColumns);
for (KeyValue col : d) {
ByteRange colRange = new ByteRange(col.getQualifier());
inputs.add(colRange);
sortedColumns.add(colRange);
}
for (ByteRange col : sortedColumns.compile().getSortedRanges()) {
outputs.add(col);
}
}
@Override
public List<ByteRange> getInputs() {
return inputs;
}
@Override
public List<ByteRange> getOutputs() {
return outputs;
}
}

View File

@ -0,0 +1,52 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hbase.codec.prefixtree.column.data;
import java.util.List;
import org.apache.hadoop.hbase.util.ByteRange;
import org.apache.hadoop.hbase.util.ByteRangeTool;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hbase.codec.prefixtree.column.TestColumnData;
import com.google.common.collect.Lists;
public class TestColumnDataSimple implements TestColumnData {
@Override
public List<ByteRange> getInputs() {
List<String> d = Lists.newArrayList();
d.add("abc");
d.add("abcde");
d.add("abc");
d.add("bbc");
d.add("abc");
return ByteRangeTool.fromArrays(Bytes.getUtf8ByteArrays(d));
}
@Override
public List<ByteRange> getOutputs() {
List<String> d = Lists.newArrayList();
d.add("abc");
d.add("abcde");
d.add("bbc");
return ByteRangeTool.fromArrays(Bytes.getUtf8ByteArrays(d));
}
}

View File

@ -0,0 +1,54 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hbase.codec.prefixtree.row;
import java.util.List;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hbase.cell.CellComparator;
import org.apache.hbase.codec.prefixtree.PrefixTreeBlockMeta;
import org.apache.hbase.codec.prefixtree.scanner.CellSearcher;
import com.google.common.collect.Lists;
public abstract class BaseTestRowData implements TestRowData {
@Override
public List<Integer> getRowStartIndexes() {
List<Integer> rowStartIndexes = Lists.newArrayList();
rowStartIndexes.add(0);
List<KeyValue> inputs = getInputs();
for (int i = 1; i < inputs.size(); ++i) {
KeyValue lastKv = inputs.get(i - 1);
KeyValue kv = inputs.get(i);
if (!CellComparator.equalsRow(lastKv, kv)) {
rowStartIndexes.add(i);
}
}
return rowStartIndexes;
}
@Override
public void individualBlockMetaAssertions(PrefixTreeBlockMeta blockMeta) {
}
@Override
public void individualSearcherAssertions(CellSearcher searcher) {
}
}

View File

@ -0,0 +1,191 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hbase.codec.prefixtree.row;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.Collection;
import java.util.List;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.KeyValueTool;
import org.apache.hadoop.hbase.util.CollectionUtils;
import org.apache.hbase.Cell;
import org.apache.hbase.cell.CellComparator;
import org.apache.hbase.cell.CellScannerPosition;
import org.apache.hbase.codec.prefixtree.decode.DecoderFactory;
import org.apache.hbase.codec.prefixtree.encode.PrefixTreeEncoder;
import org.apache.hbase.codec.prefixtree.scanner.CellSearcher;
import org.junit.Assert;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
import org.junit.runners.Parameterized.Parameters;
@RunWith(Parameterized.class)
public class TestPrefixTreeSearcher {
protected static int BLOCK_START = 7;
@Parameters
public static Collection<Object[]> parameters() {
return new TestRowData.InMemory().getAllAsObjectArray();
}
protected TestRowData rows;
protected ByteBuffer block;
public TestPrefixTreeSearcher(TestRowData testRows) throws IOException {
this.rows = testRows;
ByteArrayOutputStream os = new ByteArrayOutputStream(1 << 20);
PrefixTreeEncoder kvBuilder = new PrefixTreeEncoder(os, true);
for (KeyValue kv : rows.getInputs()) {
kvBuilder.write(kv);
}
kvBuilder.flush();
byte[] outputBytes = os.toByteArray();
this.block = ByteBuffer.wrap(outputBytes);
}
@Test
public void testScanForwards() throws IOException {
CellSearcher searcher = null;
try {
searcher = DecoderFactory.checkOut(block, true);
int i = -1;
while (searcher.next()) {
++i;
KeyValue inputCell = rows.getInputs().get(i);
Cell outputCell = searcher.getCurrent();
// check all 3 permutations of equals()
Assert.assertEquals(inputCell, outputCell);
Assert.assertEquals(outputCell, inputCell);
Assert.assertTrue(CellComparator.equals(inputCell, outputCell));
}
Assert.assertEquals(rows.getInputs().size(), i + 1);
} finally {
DecoderFactory.checkIn(searcher);
}
}
@Test
public void testScanBackwards() throws IOException {
CellSearcher searcher = null;
try {
searcher = DecoderFactory.checkOut(block, true);
searcher.positionAfterLastCell();
int i = -1;
while (searcher.previous()) {
++i;
int oppositeIndex = rows.getInputs().size() - i - 1;
KeyValue inputKv = rows.getInputs().get(oppositeIndex);
KeyValue outputKv = KeyValueTool.copyToNewKeyValue(searcher.getCurrent());
Assert.assertEquals(inputKv, outputKv);
}
Assert.assertEquals(rows.getInputs().size(), i + 1);
} finally {
DecoderFactory.checkIn(searcher);
}
}
@Test
public void testRandomSeekHits() throws IOException {
CellSearcher searcher = null;
try {
searcher = DecoderFactory.checkOut(block, true);
for (KeyValue kv : rows.getInputs()) {
boolean hit = searcher.positionAt(kv);
Assert.assertTrue(hit);
Cell foundKv = searcher.getCurrent();
Assert.assertTrue(CellComparator.equals(kv, foundKv));
}
} finally {
DecoderFactory.checkIn(searcher);
}
}
/**
* very hard to test nubs with this thing since the a nextRowKey function will usually skip them
*/
@Test
public void testRandomSeekMisses() throws IOException {
CellSearcher searcher = null;
List<Integer> rowStartIndexes = rows.getRowStartIndexes();
try {
searcher = DecoderFactory.checkOut(block, true);
for (int i=0; i < rows.getInputs().size(); ++i) {
KeyValue kv = rows.getInputs().get(i);
//nextRow
KeyValue inputNextRow = KeyValueTool.createFirstKeyInNextRow(kv);
CellScannerPosition position = searcher.positionAtOrBefore(inputNextRow);
boolean isFirstInRow = rowStartIndexes.contains(i);
if(isFirstInRow){
int rowIndex = rowStartIndexes.indexOf(i);
if(rowIndex < rowStartIndexes.size() - 1){
// int lastKvInRowI = rowStartIndexes.get(rowIndex + 1) - 1;
Assert.assertEquals(CellScannerPosition.BEFORE, position);
/*
* Can't get this to work between nubs like rowB\x00 <-> rowBB
*
* No reason to doubt that it works, but will have to come up with a smarter test.
*/
// Assert.assertEquals(rows.getInputs().get(lastKvInRowI), searcher.getCurrentCell());
}
}
//previous KV
KeyValue inputPreviousKv = KeyValueTool.previousKey(kv);
boolean hit = searcher.positionAt(inputPreviousKv);
Assert.assertFalse(hit);
position = searcher.positionAtOrAfter(inputPreviousKv);
if(CollectionUtils.isLastIndex(rows.getInputs(), i)){
Assert.assertTrue(CellScannerPosition.AFTER_LAST == position);
}else{
Assert.assertTrue(CellScannerPosition.AFTER == position);
/*
* TODO: why i+1 instead of i?
*/
Assert.assertEquals(rows.getInputs().get(i+1), searcher.getCurrent());
}
}
} finally {
DecoderFactory.checkIn(searcher);
}
}
@Test
public void testRandomSeekIndividualAssertions() throws IOException {
CellSearcher searcher = null;
try {
searcher = DecoderFactory.checkOut(block, true);
rows.individualSearcherAssertions(searcher);
} finally {
DecoderFactory.checkIn(searcher);
}
}
}

View File

@ -0,0 +1,97 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hbase.codec.prefixtree.row;
import java.util.Collection;
import java.util.List;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hbase.codec.prefixtree.PrefixTreeBlockMeta;
import org.apache.hbase.codec.prefixtree.row.data.TestRowDataComplexQualifiers;
import org.apache.hbase.codec.prefixtree.row.data.TestRowDataDeeper;
import org.apache.hbase.codec.prefixtree.row.data.TestRowDataDifferentTimestamps;
import org.apache.hbase.codec.prefixtree.row.data.TestRowDataEmpty;
import org.apache.hbase.codec.prefixtree.row.data.TestRowDataExerciseFInts;
import org.apache.hbase.codec.prefixtree.row.data.TestRowDataNub;
import org.apache.hbase.codec.prefixtree.row.data.TestRowDataNumberStrings;
import org.apache.hbase.codec.prefixtree.row.data.TestRowDataQualifierByteOrdering;
import org.apache.hbase.codec.prefixtree.row.data.TestRowDataRandomKeyValues;
import org.apache.hbase.codec.prefixtree.row.data.TestRowDataSearcherRowMiss;
import org.apache.hbase.codec.prefixtree.row.data.TestRowDataSimple;
import org.apache.hbase.codec.prefixtree.row.data.TestRowDataSingleQualifier;
import org.apache.hbase.codec.prefixtree.row.data.TestRowDataTrivial;
import org.apache.hbase.codec.prefixtree.row.data.TestRowDataUrls;
import org.apache.hbase.codec.prefixtree.row.data.TestRowDataUrlsExample;
import org.apache.hbase.codec.prefixtree.scanner.CellSearcher;
import com.google.common.collect.Lists;
/*
* A master class for registering different implementations of TestRowData.
*/
public interface TestRowData {
List<KeyValue> getInputs();
List<Integer> getRowStartIndexes();
void individualBlockMetaAssertions(PrefixTreeBlockMeta blockMeta);
void individualSearcherAssertions(CellSearcher searcher);
public static class InMemory {
/*
* The following are different styles of data that the codec may encounter. Having these small
* representations of the data helps pinpoint what is wrong if the encoder breaks.
*/
public static Collection<TestRowData> getAll() {
List<TestRowData> all = Lists.newArrayList();
//simple
all.add(new TestRowDataEmpty());
all.add(new TestRowDataTrivial());
all.add(new TestRowDataSimple());
all.add(new TestRowDataDeeper());
//more specific
all.add(new TestRowDataSingleQualifier());
// all.add(new TestRowDataMultiFamilies());//multiple families disabled in PrefixTreeEncoder
all.add(new TestRowDataNub());
all.add(new TestRowDataSearcherRowMiss());
all.add(new TestRowDataQualifierByteOrdering());
all.add(new TestRowDataComplexQualifiers());
all.add(new TestRowDataDifferentTimestamps());
//larger data volumes (hard to debug)
all.add(new TestRowDataNumberStrings());
all.add(new TestRowDataUrls());
all.add(new TestRowDataUrlsExample());
all.add(new TestRowDataExerciseFInts());
all.add(new TestRowDataRandomKeyValues());
return all;
}
public static Collection<Object[]> getAllAsObjectArray() {
List<Object[]> all = Lists.newArrayList();
for (TestRowData testRows : getAll()) {
all.add(new Object[] { testRows });
}
return all;
}
}
}

View File

@ -0,0 +1,186 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hbase.codec.prefixtree.row;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.Collection;
import java.util.List;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.KeyValueTool;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hbase.Cell;
import org.apache.hbase.codec.prefixtree.PrefixTreeBlockMeta;
import org.apache.hbase.codec.prefixtree.decode.PrefixTreeArraySearcher;
import org.apache.hbase.codec.prefixtree.encode.PrefixTreeEncoder;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
import org.junit.runners.Parameterized.Parameters;
import com.google.common.collect.Lists;
@RunWith(Parameterized.class)
public class TestRowEncoder {
protected static int BLOCK_START = 7;
@Parameters
public static Collection<Object[]> parameters() {
List<Object[]> parameters = Lists.newArrayList();
for (TestRowData testRows : TestRowData.InMemory.getAll()) {
parameters.add(new Object[] { testRows });
}
return parameters;
}
protected TestRowData rows;
protected List<KeyValue> inputKvs;
protected boolean includeMemstoreTS = true;
protected ByteArrayOutputStream os;
protected PrefixTreeEncoder encoder;
protected int totalBytes;
protected PrefixTreeBlockMeta blockMetaWriter;
protected byte[] outputBytes;
protected ByteBuffer buffer;
protected ByteArrayInputStream is;
protected PrefixTreeBlockMeta blockMetaReader;
protected byte[] inputBytes;
protected PrefixTreeArraySearcher searcher;
public TestRowEncoder(TestRowData testRows) {
this.rows = testRows;
}
@Before
public void compile() throws IOException {
os = new ByteArrayOutputStream(1 << 20);
encoder = new PrefixTreeEncoder(os, includeMemstoreTS);
inputKvs = rows.getInputs();
for (KeyValue kv : inputKvs) {
encoder.write(kv);
}
encoder.flush();
totalBytes = encoder.getTotalBytes();
blockMetaWriter = encoder.getBlockMeta();
outputBytes = os.toByteArray();
// start reading, but save the assertions for @Test methods
buffer = ByteBuffer.wrap(outputBytes);
blockMetaReader = new PrefixTreeBlockMeta(buffer);
searcher = new PrefixTreeArraySearcher(blockMetaReader, blockMetaReader.getRowTreeDepth(),
blockMetaReader.getMaxRowLength(), blockMetaReader.getMaxQualifierLength());
searcher.initOnBlock(blockMetaReader, outputBytes, includeMemstoreTS);
}
@Test
public void testEncoderOutput() throws IOException {
Assert.assertEquals(totalBytes, outputBytes.length);
Assert.assertEquals(blockMetaWriter, blockMetaReader);
}
@Test
public void testForwardScanner() {
int counter = -1;
while (searcher.next()) {
++counter;
KeyValue inputKv = rows.getInputs().get(counter);
KeyValue outputKv = KeyValueTool.copyToNewKeyValue(searcher.getCurrent());
assertKeyAndValueEqual(inputKv, outputKv);
}
// assert same number of cells
Assert.assertEquals(rows.getInputs().size(), counter + 1);
}
/**
* probably not needed since testReverseScannerWithJitter() below is more thorough
*/
@Test
public void testReverseScanner() {
searcher.positionAfterLastCell();
int counter = -1;
while (searcher.previous()) {
++counter;
int oppositeIndex = rows.getInputs().size() - counter - 1;
KeyValue inputKv = rows.getInputs().get(oppositeIndex);
KeyValue outputKv = KeyValueTool.copyToNewKeyValue(searcher.getCurrent());
assertKeyAndValueEqual(inputKv, outputKv);
}
Assert.assertEquals(rows.getInputs().size(), counter + 1);
}
/**
* Exercise the nubCellsRemain variable by calling next+previous. NubCellsRemain is basically
* a special fan index.
*/
@Test
public void testReverseScannerWithJitter() {
searcher.positionAfterLastCell();
int counter = -1;
while (true) {
boolean foundCell = searcher.previous();
if (!foundCell) {
break;
}
++counter;
// a next+previous should cancel out
if (!searcher.isAfterLast()) {
searcher.next();
searcher.previous();
}
int oppositeIndex = rows.getInputs().size() - counter - 1;
KeyValue inputKv = rows.getInputs().get(oppositeIndex);
KeyValue outputKv = KeyValueTool.copyToNewKeyValue(searcher.getCurrent());
assertKeyAndValueEqual(inputKv, outputKv);
}
Assert.assertEquals(rows.getInputs().size(), counter + 1);
}
@Test
public void testIndividualBlockMetaAssertions() {
rows.individualBlockMetaAssertions(blockMetaReader);
}
/**************** helper **************************/
protected void assertKeyAndValueEqual(Cell expected, Cell actual) {
// assert keys are equal (doesn't compare values)
Assert.assertEquals(expected, actual);
if (includeMemstoreTS) {
Assert.assertEquals(expected.getMvccVersion(), actual.getMvccVersion());
}
// assert values equal
Assert.assertTrue(Bytes.equals(expected.getValueArray(), expected.getValueOffset(),
expected.getValueLength(), actual.getValueArray(), actual.getValueOffset(),
actual.getValueLength()));
}
}

View File

@ -0,0 +1,67 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hbase.codec.prefixtree.row.data;
import java.util.List;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hbase.codec.prefixtree.PrefixTreeTestConstants;
import org.apache.hbase.codec.prefixtree.row.BaseTestRowData;
import com.google.common.collect.Lists;
public class TestRowDataComplexQualifiers extends BaseTestRowData{
static byte[]
Arow = Bytes.toBytes("Arow"),
cf = PrefixTreeTestConstants.TEST_CF,
v0 = Bytes.toBytes("v0");
static List<byte[]> qualifiers = Lists.newArrayList();
static {
List<String> qualifierStrings = Lists.newArrayList();
qualifierStrings.add("cq");
qualifierStrings.add("cq0");
qualifierStrings.add("cq1");
qualifierStrings.add("cq2");
qualifierStrings.add("dq0");// second root level fan
qualifierStrings.add("dq1");// nub
qualifierStrings.add("dq111");// leaf on nub
qualifierStrings.add("dq11111a");// leaf on leaf
for (String s : qualifierStrings) {
qualifiers.add(Bytes.toBytes(s));
}
}
static long ts = 55L;
static List<KeyValue> d = Lists.newArrayList();
static {
for (byte[] qualifier : qualifiers) {
d.add(new KeyValue(Arow, cf, qualifier, ts, v0));
}
}
@Override
public List<KeyValue> getInputs() {
return d;
}
}

View File

@ -0,0 +1,84 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hbase.codec.prefixtree.row.data;
import java.util.List;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hbase.cell.CellScannerPosition;
import org.apache.hbase.codec.prefixtree.PrefixTreeBlockMeta;
import org.apache.hbase.codec.prefixtree.row.BaseTestRowData;
import org.apache.hbase.codec.prefixtree.scanner.CellSearcher;
import org.junit.Assert;
import com.google.common.collect.Lists;
/*
* Goes beyond a trivial trie to add a branch on the "cf" node
*/
public class TestRowDataDeeper extends BaseTestRowData{
static byte[]
cdc = Bytes.toBytes("cdc"),
cf6 = Bytes.toBytes("cf6"),
cfc = Bytes.toBytes("cfc"),
f = Bytes.toBytes("f"),
q = Bytes.toBytes("q"),
v = Bytes.toBytes("v");
static long
ts = 55L;
static List<KeyValue> d = Lists.newArrayList();
static{
d.add(new KeyValue(cdc, f, q, ts, v));
d.add(new KeyValue(cf6, f, q, ts, v));
d.add(new KeyValue(cfc, f, q, ts, v));
}
@Override
public List<KeyValue> getInputs() {
return d;
}
@Override
public void individualBlockMetaAssertions(PrefixTreeBlockMeta blockMeta) {
//0: token:c; fan:d,f
//1: token:f; fan:6,c
//2: leaves
Assert.assertEquals(3, blockMeta.getRowTreeDepth());
}
@Override
public void individualSearcherAssertions(CellSearcher searcher) {
/**
* The searcher should get a token mismatch on the "r" branch. Assert that it skips not only
* rA, but rB as well.
*/
KeyValue cfcRow = KeyValue.createFirstOnRow(Bytes.toBytes("cfc"));
CellScannerPosition position = searcher.positionAtOrAfter(cfcRow);
Assert.assertEquals(CellScannerPosition.AFTER, position);
Assert.assertEquals(d.get(2), searcher.getCurrent());
searcher.previous();
Assert.assertEquals(d.get(1), searcher.getCurrent());
}
}

View File

@ -0,0 +1,94 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hbase.codec.prefixtree.row.data;
import java.util.List;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hbase.codec.prefixtree.PrefixTreeBlockMeta;
import org.apache.hbase.codec.prefixtree.row.BaseTestRowData;
import org.junit.Assert;
import com.google.common.collect.Lists;
/*
* test different timestamps
*/
public class TestRowDataDifferentTimestamps extends BaseTestRowData{
static byte[]
Arow = Bytes.toBytes("Arow"),
Brow = Bytes.toBytes("Brow"),
cf = Bytes.toBytes("fammy"),
cq0 = Bytes.toBytes("cq0"),
cq1 = Bytes.toBytes("cq1"),
v0 = Bytes.toBytes("v0");
static List<KeyValue> d = Lists.newArrayList();
static{
KeyValue kv0 = new KeyValue(Arow, cf, cq0, 0L, v0);
kv0.setMvccVersion(123456789L);
d.add(kv0);
KeyValue kv1 = new KeyValue(Arow, cf, cq1, 1L, v0);
kv1.setMvccVersion(3L);
d.add(kv1);
KeyValue kv2 = new KeyValue(Brow, cf, cq0, 12345678L, v0);
kv2.setMvccVersion(65537L);
d.add(kv2);
//watch out... Long.MAX_VALUE comes back as 1332221664203, even with other encoders
// d.add(new KeyValue(Brow, cf, cq1, Long.MAX_VALUE, v0));
KeyValue kv3 = new KeyValue(Brow, cf, cq1, Long.MAX_VALUE-1, v0);
kv3.setMvccVersion(1L);
d.add(kv3);
KeyValue kv4 = new KeyValue(Brow, cf, cq1, 999999999, v0);
//don't set memstoreTS
d.add(kv4);
KeyValue kv5 = new KeyValue(Brow, cf, cq1, 12345, v0);
kv5.setMvccVersion(0L);
d.add(kv5);
}
@Override
public List<KeyValue> getInputs() {
return d;
}
@Override
public void individualBlockMetaAssertions(PrefixTreeBlockMeta blockMeta) {
Assert.assertTrue(blockMeta.getNumMvccVersionBytes() > 0);
Assert.assertEquals(12, blockMeta.getNumValueBytes());
Assert.assertFalse(blockMeta.isAllSameTimestamp());
Assert.assertNotNull(blockMeta.getMinTimestamp());
Assert.assertTrue(blockMeta.getTimestampIndexWidth() > 0);
Assert.assertTrue(blockMeta.getTimestampDeltaWidth() > 0);
Assert.assertFalse(blockMeta.isAllSameMvccVersion());
Assert.assertNotNull(blockMeta.getMinMvccVersion());
Assert.assertTrue(blockMeta.getMvccVersionIndexWidth() > 0);
Assert.assertTrue(blockMeta.getMvccVersionDeltaWidth() > 0);
}
}

View File

@ -0,0 +1,43 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hbase.codec.prefixtree.row.data;
import java.util.List;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.KeyValue.Type;
import org.apache.hbase.codec.prefixtree.row.BaseTestRowData;
import com.google.common.collect.Lists;
public class TestRowDataEmpty extends BaseTestRowData{
private static byte[] b = new byte[0];
static List<KeyValue> d = Lists.newArrayList();
static {
d.add(new KeyValue(b, b, b, 0L, Type.Put, b));
}
@Override
public List<KeyValue> getInputs() {
return d;
}
}

View File

@ -0,0 +1,114 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hbase.codec.prefixtree.row.data;
import java.util.ArrayList;
import java.util.List;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.util.ByteRange;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hbase.codec.prefixtree.PrefixTreeBlockMeta;
import org.apache.hbase.codec.prefixtree.PrefixTreeTestConstants;
import org.apache.hbase.codec.prefixtree.row.BaseTestRowData;
import org.apache.hbase.util.byterange.impl.ByteRangeTreeSet;
import org.junit.Assert;
import com.google.common.collect.Lists;
/*
* test different timestamps
*
* http://pastebin.com/7ks8kzJ2
* http://pastebin.com/MPn03nsK
*/
public class TestRowDataExerciseFInts extends BaseTestRowData{
static List<ByteRange> rows;
static{
List<String> rowStrings = new ArrayList<String>();
rowStrings.add("com.edsBlog/directoryAa/pageAaa");
rowStrings.add("com.edsBlog/directoryAa/pageBbb");
rowStrings.add("com.edsBlog/directoryAa/pageCcc");
rowStrings.add("com.edsBlog/directoryAa/pageDdd");
rowStrings.add("com.edsBlog/directoryBb/pageEee");
rowStrings.add("com.edsBlog/directoryBb/pageFff");
rowStrings.add("com.edsBlog/directoryBb/pageGgg");
rowStrings.add("com.edsBlog/directoryBb/pageHhh");
rowStrings.add("com.isabellasBlog/directoryAa/pageAaa");
rowStrings.add("com.isabellasBlog/directoryAa/pageBbb");
rowStrings.add("com.isabellasBlog/directoryAa/pageCcc");
rowStrings.add("com.isabellasBlog/directoryAa/pageDdd");
rowStrings.add("com.isabellasBlog/directoryBb/pageEee");
rowStrings.add("com.isabellasBlog/directoryBb/pageFff");
rowStrings.add("com.isabellasBlog/directoryBb/pageGgg");
rowStrings.add("com.isabellasBlog/directoryBb/pageHhh");
ByteRangeTreeSet ba = new ByteRangeTreeSet();
for(String row : rowStrings){
ba.add(new ByteRange(Bytes.toBytes(row)));
}
rows = ba.compile().getSortedRanges();
}
static List<String> cols = Lists.newArrayList();
static{
cols.add("Chrome");
cols.add("Chromeb");
cols.add("Firefox");
cols.add("InternetExplorer");
cols.add("Opera");
cols.add("Safari");
cols.add("Z1stBrowserWithHuuuuuuuuuuuugeQualifier");
cols.add("Z2ndBrowserWithEvenBiggerQualifierMoreMoreMoreMoreMore");
cols.add("Z3rdBrowserWithEvenBiggerQualifierMoreMoreMoreMoreMore");
cols.add("Z4thBrowserWithEvenBiggerQualifierMoreMoreMoreMoreMore");
cols.add("Z5thBrowserWithEvenBiggerQualifierMoreMoreMoreMoreMore");
cols.add("Z6thBrowserWithEvenBiggerQualifierMoreMoreMoreMoreMore");
cols.add("Z7thBrowserWithEvenBiggerQualifierMoreMoreMoreMoreMore");
cols.add("Z8thBrowserWithEvenBiggerQualifierMoreMoreMoreMoreMore");
cols.add("Z9thBrowserWithEvenBiggerQualifierMoreMoreMoreMoreMore");
}
static long ts = 1234567890;
static int MAX_VALUE = 50;
static List<KeyValue> kvs = Lists.newArrayList();
static {
for (ByteRange row : rows) {
for (String col : cols) {
KeyValue kv = new KeyValue(row.deepCopyToNewArray(), PrefixTreeTestConstants.TEST_CF,
Bytes.toBytes(col), ts, KeyValue.Type.Put, Bytes.toBytes("VALUE"));
kvs.add(kv);
}
}
}
@Override
public List<KeyValue> getInputs() {
return kvs;
}
@Override
public void individualBlockMetaAssertions(PrefixTreeBlockMeta blockMeta) {
Assert.assertTrue(blockMeta.getNextNodeOffsetWidth() > 1);
Assert.assertTrue(blockMeta.getQualifierOffsetWidth() > 1);
}
}

View File

@ -0,0 +1,60 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hbase.codec.prefixtree.row.data;
import java.util.List;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hbase.codec.prefixtree.row.BaseTestRowData;
import com.google.common.collect.Lists;
public class TestRowDataMultiFamilies extends BaseTestRowData{
static byte[]
rowA = Bytes.toBytes("rowA"),
rowB = Bytes.toBytes("rowB"),
famA = Bytes.toBytes("famA"),
famB = Bytes.toBytes("famB"),
famBB = Bytes.toBytes("famBB"),
q0 = Bytes.toBytes("q0"),
q1 = Bytes.toBytes("q1"),//start with a different character
vvv = Bytes.toBytes("vvv");
static long ts = 55L;
static List<KeyValue> d = Lists.newArrayList();
static {
d.add(new KeyValue(rowA, famA, q0, ts, vvv));
d.add(new KeyValue(rowA, famB, q1, ts, vvv));
d.add(new KeyValue(rowA, famBB, q0, ts, vvv));
d.add(new KeyValue(rowB, famA, q0, ts, vvv));
d.add(new KeyValue(rowB, famA, q1, ts, vvv));
d.add(new KeyValue(rowB, famB, q0, ts, vvv));
d.add(new KeyValue(rowB, famBB, q0, ts, vvv));
d.add(new KeyValue(rowB, famBB, q1, ts, vvv));
}
@Override
public List<KeyValue> getInputs() {
return d;
}
}

View File

@ -0,0 +1,59 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hbase.codec.prefixtree.row.data;
import java.util.List;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hbase.codec.prefixtree.PrefixTreeTestConstants;
import org.apache.hbase.codec.prefixtree.row.BaseTestRowData;
import com.google.common.collect.Lists;
public class TestRowDataNub extends BaseTestRowData{
static byte[]
rowA = Bytes.toBytes("rowA"),
rowB = Bytes.toBytes("rowB"),//nub
rowBB = Bytes.toBytes("rowBB"),
cf = PrefixTreeTestConstants.TEST_CF,
cq0 = Bytes.toBytes("cq0"),
cq1 = Bytes.toBytes("cq1"),
v0 = Bytes.toBytes("v0");
static long
ts = 55L;
static List<KeyValue> d = Lists.newArrayList();
static{
d.add(new KeyValue(rowA, cf, cq0, ts, v0));
d.add(new KeyValue(rowA, cf, cq1, ts, v0));
d.add(new KeyValue(rowB, cf, cq0, ts, v0));
d.add(new KeyValue(rowB, cf, cq1, ts, v0));
d.add(new KeyValue(rowBB, cf, cq0, ts, v0));
d.add(new KeyValue(rowBB, cf, cq1, ts, v0));
}
@Override
public List<KeyValue> getInputs() {
return d;
}
}

View File

@ -0,0 +1,61 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hbase.codec.prefixtree.row.data;
import java.util.Collections;
import java.util.List;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.KeyValue.Type;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hbase.cell.CellComparator;
import org.apache.hbase.codec.prefixtree.row.BaseTestRowData;
import com.google.common.collect.Lists;
public class TestRowDataNumberStrings extends BaseTestRowData{
static List<KeyValue> d = Lists.newArrayList();
static {
/**
* Test a string-encoded list of numbers. 0, 1, 10, 11 will sort as 0, 1, 10, 11 if strings
* <p/>
* This helped catch a bug with reverse scanning where it was jumping from the last leaf cell to
* the previous nub. It should do 11->10, but it was incorrectly doing 11->1
*/
List<Integer> problematicSeries = Lists.newArrayList(0, 1, 10, 11);//sort this at the end
for(Integer i : problematicSeries){
// for(int i=0; i < 13; ++i){
byte[] row = Bytes.toBytes(""+i);
byte[] family = Bytes.toBytes("F");
byte[] column = Bytes.toBytes("C");
byte[] value = Bytes.toBytes("V");
d.add(new KeyValue(row, family, column, 0L, Type.Put, value));
}
Collections.sort(d, new CellComparator());
}
@Override
public List<KeyValue> getInputs() {
return d;
}
}

View File

@ -0,0 +1,58 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hbase.codec.prefixtree.row.data;
import java.util.List;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hbase.codec.prefixtree.row.BaseTestRowData;
import com.google.common.collect.Lists;
public class TestRowDataQualifierByteOrdering extends BaseTestRowData{
static byte[]
Arow = Bytes.toBytes("Arow"),
Brow = Bytes.toBytes("Brow"),
Brow2 = Bytes.toBytes("Brow2"),
fam = Bytes.toBytes("HappyFam"),
cq0 = Bytes.toBytes("cq0"),
cq1 = Bytes.toBytes("cq1tail"),//make sure tail does not come back as liat
cq2 = Bytes.toBytes("cq2"),
v0 = Bytes.toBytes("v0");
static long ts = 55L;
static List<KeyValue> d = Lists.newArrayList();
static {
d.add(new KeyValue(Arow, fam, cq0, ts, v0));
d.add(new KeyValue(Arow, fam, cq1, ts, v0));
d.add(new KeyValue(Brow, fam, cq0, ts, v0));
d.add(new KeyValue(Brow, fam, cq2, ts, v0));
d.add(new KeyValue(Brow2, fam, cq1, ts, v0));
d.add(new KeyValue(Brow2, fam, cq2, ts, v0));
}
@Override
public List<KeyValue> getInputs() {
return d;
}
}

View File

@ -0,0 +1,42 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hbase.codec.prefixtree.row.data;
import java.util.List;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.util.test.RedundantKVGenerator;
import org.apache.hbase.codec.prefixtree.row.BaseTestRowData;
import com.google.common.collect.Lists;
public class TestRowDataRandomKeyValues extends BaseTestRowData {
static List<KeyValue> d = Lists.newArrayList();
static RedundantKVGenerator generator = new RedundantKVGenerator();
static {
d = generator.generateTestKeyValues(1 << 10);
}
@Override
public List<KeyValue> getInputs() {
return d;
}
}

View File

@ -0,0 +1,123 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hbase.codec.prefixtree.row.data;
import java.util.List;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hbase.Cell;
import org.apache.hbase.cell.CellComparator;
import org.apache.hbase.cell.CellScannerPosition;
import org.apache.hbase.codec.prefixtree.row.BaseTestRowData;
import org.apache.hbase.codec.prefixtree.scanner.CellSearcher;
import org.junit.Assert;
import com.google.common.collect.Lists;
public class TestRowDataSearcherRowMiss extends BaseTestRowData{
static byte[]
//don't let the rows share any common prefix bytes
A = Bytes.toBytes("A"),
AA = Bytes.toBytes("AA"),
AAA = Bytes.toBytes("AAA"),
B = Bytes.toBytes("B"),
cf = Bytes.toBytes("fam"),
cq = Bytes.toBytes("cq0"),
v = Bytes.toBytes("v0");
static long
ts = 55L;
static List<KeyValue> d = Lists.newArrayList();
static{
d.add(new KeyValue(A, cf, cq, ts, v));
d.add(new KeyValue(AA, cf, cq, ts, v));
d.add(new KeyValue(AAA, cf, cq, ts, v));
d.add(new KeyValue(B, cf, cq, ts, v));
}
@Override
public List<KeyValue> getInputs() {
return d;
}
@Override
public void individualSearcherAssertions(CellSearcher searcher) {
assertRowOffsetsCorrect();
searcher.resetToBeforeFirstEntry();
//test first cell
searcher.next();
Cell first = searcher.getCurrent();
Assert.assertTrue(CellComparator.equals(d.get(0), first));
//test first cell in second row
Assert.assertTrue(searcher.positionAt(d.get(1)));
Assert.assertTrue(CellComparator.equals(d.get(1), searcher.getCurrent()));
testBetween1and2(searcher);
testBetween2and3(searcher);
}
/************ private methods, call from above *******************/
private void assertRowOffsetsCorrect(){
Assert.assertEquals(4, getRowStartIndexes().size());
}
private void testBetween1and2(CellSearcher searcher){
CellScannerPosition p;//reuse
Cell betweenAAndAAA = new KeyValue(AA, cf, cq, ts-2, v);
//test exact
Assert.assertFalse(searcher.positionAt(betweenAAndAAA));
//test atOrBefore
p = searcher.positionAtOrBefore(betweenAAndAAA);
Assert.assertEquals(CellScannerPosition.BEFORE, p);
Assert.assertTrue(CellComparator.equals(searcher.getCurrent(), d.get(1)));
//test atOrAfter
p = searcher.positionAtOrAfter(betweenAAndAAA);
Assert.assertEquals(CellScannerPosition.AFTER, p);
Assert.assertTrue(CellComparator.equals(searcher.getCurrent(), d.get(2)));
}
private void testBetween2and3(CellSearcher searcher){
CellScannerPosition p;//reuse
Cell betweenAAAndB = new KeyValue(AAA, cf, cq, ts-2, v);
//test exact
Assert.assertFalse(searcher.positionAt(betweenAAAndB));
//test atOrBefore
p = searcher.positionAtOrBefore(betweenAAAndB);
Assert.assertEquals(CellScannerPosition.BEFORE, p);
Assert.assertTrue(CellComparator.equals(searcher.getCurrent(), d.get(2)));
//test atOrAfter
p = searcher.positionAtOrAfter(betweenAAAndB);
Assert.assertEquals(CellScannerPosition.AFTER, p);
Assert.assertTrue(CellComparator.equals(searcher.getCurrent(), d.get(3)));
}
}

View File

@ -0,0 +1,112 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hbase.codec.prefixtree.row.data;
import java.util.List;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.CollectionUtils;
import org.apache.hbase.Cell;
import org.apache.hbase.cell.CellComparator;
import org.apache.hbase.cell.CellScannerPosition;
import org.apache.hbase.codec.prefixtree.row.BaseTestRowData;
import org.apache.hbase.codec.prefixtree.scanner.CellSearcher;
import org.junit.Assert;
import com.google.common.collect.Lists;
public class TestRowDataSimple extends BaseTestRowData {
static byte[]
// don't let the rows share any common prefix bytes
rowA = Bytes.toBytes("Arow"),
rowB = Bytes.toBytes("Brow"), cf = Bytes.toBytes("fam"),
cq0 = Bytes.toBytes("cq0"),
cq1 = Bytes.toBytes("cq1tail"),// make sure tail does not come back as liat
cq2 = Bytes.toBytes("dcq2"),// start with a different character
v0 = Bytes.toBytes("v0");
static long ts = 55L;
static List<KeyValue> d = Lists.newArrayList();
static {
d.add(new KeyValue(rowA, cf, cq0, ts, v0));
d.add(new KeyValue(rowA, cf, cq1, ts, v0));
d.add(new KeyValue(rowA, cf, cq2, ts, v0));
d.add(new KeyValue(rowB, cf, cq0, ts, v0));
d.add(new KeyValue(rowB, cf, cq1, ts, v0));
d.add(new KeyValue(rowB, cf, cq2, ts, v0));
}
@Override
public List<KeyValue> getInputs() {
return d;
}
@Override
public void individualSearcherAssertions(CellSearcher searcher) {
CellScannerPosition p;// reuse
searcher.resetToBeforeFirstEntry();
// test first cell
searcher.next();
Cell first = searcher.getCurrent();
Assert.assertTrue(CellComparator.equals(d.get(0), first));
// test first cell in second row
Assert.assertTrue(searcher.positionAt(d.get(3)));
Assert.assertTrue(CellComparator.equals(d.get(3), searcher.getCurrent()));
Cell between4And5 = new KeyValue(rowB, cf, cq1, ts - 2, v0);
// test exact
Assert.assertFalse(searcher.positionAt(between4And5));
// test atOrBefore
p = searcher.positionAtOrBefore(between4And5);
Assert.assertEquals(CellScannerPosition.BEFORE, p);
Assert.assertTrue(CellComparator.equals(searcher.getCurrent(), d.get(4)));
// test atOrAfter
p = searcher.positionAtOrAfter(between4And5);
Assert.assertEquals(CellScannerPosition.AFTER, p);
Assert.assertTrue(CellComparator.equals(searcher.getCurrent(), d.get(5)));
// test when key falls before first key in block
Cell beforeFirst = new KeyValue(Bytes.toBytes("A"), cf, cq0, ts, v0);
Assert.assertFalse(searcher.positionAt(beforeFirst));
p = searcher.positionAtOrBefore(beforeFirst);
Assert.assertEquals(CellScannerPosition.BEFORE_FIRST, p);
p = searcher.positionAtOrAfter(beforeFirst);
Assert.assertEquals(CellScannerPosition.AFTER, p);
Assert.assertTrue(CellComparator.equals(searcher.getCurrent(), d.get(0)));
Assert.assertEquals(d.get(0), searcher.getCurrent());
// test when key falls after last key in block
Cell afterLast = new KeyValue(Bytes.toBytes("z"), cf, cq0, ts, v0);// must be lower case z
Assert.assertFalse(searcher.positionAt(afterLast));
p = searcher.positionAtOrAfter(afterLast);
Assert.assertEquals(CellScannerPosition.AFTER_LAST, p);
p = searcher.positionAtOrBefore(afterLast);
Assert.assertEquals(CellScannerPosition.BEFORE, p);
Assert.assertTrue(CellComparator.equals(searcher.getCurrent(), CollectionUtils.getLast(d)));
}
}

View File

@ -0,0 +1,52 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hbase.codec.prefixtree.row.data;
import java.util.List;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hbase.codec.prefixtree.PrefixTreeTestConstants;
import org.apache.hbase.codec.prefixtree.row.BaseTestRowData;
import com.google.common.collect.Lists;
public class TestRowDataSingleQualifier extends BaseTestRowData{
static byte[]
rowA = Bytes.toBytes("rowA"),
rowB = Bytes.toBytes("rowB"),
cf = PrefixTreeTestConstants.TEST_CF,
cq0 = Bytes.toBytes("cq0"),
v0 = Bytes.toBytes("v0");
static long ts = 55L;
static List<KeyValue> d = Lists.newArrayList();
static {
d.add(new KeyValue(rowA, cf, cq0, ts, v0));
d.add(new KeyValue(rowB, cf, cq0, ts, v0));
}
@Override
public List<KeyValue> getInputs() {
return d;
}
}

View File

@ -0,0 +1,73 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hbase.codec.prefixtree.row.data;
import java.util.List;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hbase.cell.CellScannerPosition;
import org.apache.hbase.codec.prefixtree.PrefixTreeBlockMeta;
import org.apache.hbase.codec.prefixtree.row.BaseTestRowData;
import org.apache.hbase.codec.prefixtree.scanner.CellSearcher;
import org.junit.Assert;
import com.google.common.collect.Lists;
public class TestRowDataTrivial extends BaseTestRowData{
static byte[]
rA = Bytes.toBytes("rA"),
rB = Bytes.toBytes("rB"),//turn "r" into a branch for the Searcher tests
cf = Bytes.toBytes("fam"),
cq0 = Bytes.toBytes("q0"),
v0 = Bytes.toBytes("v0");
static long ts = 55L;
static List<KeyValue> d = Lists.newArrayList();
static {
d.add(new KeyValue(rA, cf, cq0, ts, v0));
d.add(new KeyValue(rB, cf, cq0, ts, v0));
}
@Override
public List<KeyValue> getInputs() {
return d;
}
@Override
public void individualBlockMetaAssertions(PrefixTreeBlockMeta blockMeta) {
// node[0] -> root[r]
// node[1] -> leaf[A], etc
Assert.assertEquals(2, blockMeta.getRowTreeDepth());
}
@Override
public void individualSearcherAssertions(CellSearcher searcher) {
/**
* The searcher should get a token mismatch on the "r" branch. Assert that it skips not only rA,
* but rB as well.
*/
KeyValue afterLast = KeyValue.createFirstOnRow(Bytes.toBytes("zzz"));
CellScannerPosition position = searcher.positionAtOrAfter(afterLast);
Assert.assertEquals(CellScannerPosition.AFTER_LAST, position);
Assert.assertNull(searcher.getCurrent());
}
}

View File

@ -0,0 +1,98 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hbase.codec.prefixtree.row.data;
import java.util.ArrayList;
import java.util.List;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.util.ByteRange;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hbase.codec.prefixtree.PrefixTreeTestConstants;
import org.apache.hbase.codec.prefixtree.row.BaseTestRowData;
import org.apache.hbase.util.byterange.impl.ByteRangeTreeSet;
import com.google.common.collect.Lists;
/*
* test different timestamps
*
* http://pastebin.com/7ks8kzJ2
* http://pastebin.com/MPn03nsK
*/
public class TestRowDataUrls extends BaseTestRowData{
static List<ByteRange> rows;
static{
List<String> rowStrings = new ArrayList<String>();
rowStrings.add("com.edsBlog/directoryAa/pageAaa");
rowStrings.add("com.edsBlog/directoryAa/pageBbb");
rowStrings.add("com.edsBlog/directoryAa/pageCcc");
rowStrings.add("com.edsBlog/directoryAa/pageDdd");
rowStrings.add("com.edsBlog/directoryBb/pageEee");
rowStrings.add("com.edsBlog/directoryBb/pageFff");
rowStrings.add("com.edsBlog/directoryBb/pageGgg");
rowStrings.add("com.edsBlog/directoryBb/pageHhh");
rowStrings.add("com.isabellasBlog/directoryAa/pageAaa");
rowStrings.add("com.isabellasBlog/directoryAa/pageBbb");
rowStrings.add("com.isabellasBlog/directoryAa/pageCcc");
rowStrings.add("com.isabellasBlog/directoryAa/pageDdd");
rowStrings.add("com.isabellasBlog/directoryBb/pageEee");
rowStrings.add("com.isabellasBlog/directoryBb/pageFff");
rowStrings.add("com.isabellasBlog/directoryBb/pageGgg");
rowStrings.add("com.isabellasBlog/directoryBb/pageHhh");
ByteRangeTreeSet ba = new ByteRangeTreeSet();
for (String row : rowStrings) {
ba.add(new ByteRange(Bytes.toBytes(row)));
}
rows = ba.compile().getSortedRanges();
}
static List<String> cols = Lists.newArrayList();
static {
cols.add("Chrome");
cols.add("Chromeb");
cols.add("Firefox");
cols.add("InternetExplorer");
cols.add("Opera");
cols.add("Safari");
}
static long ts = 1234567890;
static int MAX_VALUE = 50;
static List<KeyValue> kvs = Lists.newArrayList();
static {
for (ByteRange row : rows) {
for (String col : cols) {
KeyValue kv = new KeyValue(row.deepCopyToNewArray(), PrefixTreeTestConstants.TEST_CF,
Bytes.toBytes(col), ts, KeyValue.Type.Put, Bytes.toBytes("VALUE"));
kvs.add(kv);
// System.out.println("TestRows5:"+kv);
}
}
}
@Override
public List<KeyValue> getInputs() {
return kvs;
}
}

View File

@ -0,0 +1,126 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hbase.codec.prefixtree.row.data;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.util.List;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.KeyValueTestUtil;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hbase.codec.prefixtree.encode.PrefixTreeEncoder;
import org.apache.hbase.codec.prefixtree.encode.column.ColumnNodeWriter;
import org.apache.hbase.codec.prefixtree.encode.row.RowNodeWriter;
import org.apache.hbase.codec.prefixtree.encode.tokenize.TokenizerNode;
import org.apache.hbase.codec.prefixtree.row.BaseTestRowData;
import com.google.common.collect.Lists;
/*
* test different timestamps
*
* http://pastebin.com/7ks8kzJ2
* http://pastebin.com/MPn03nsK
*/
public class TestRowDataUrlsExample extends BaseTestRowData{
static String TENANT_ID = Integer.toString(95322);
static String APP_ID = Integer.toString(12);
static List<String> URLS = Lists.newArrayList(
"com.dablog/2011/10/04/boating",
"com.dablog/2011/10/09/lasers",
"com.jamiesrecipes", //this nub helped find a bug
"com.jamiesrecipes/eggs");
static String FAMILY = "hits";
static List<String> BROWSERS = Lists.newArrayList(
"Chrome", "IE8", "IE9beta");//, "Opera", "Safari");
static long TIMESTAMP = 1234567890;
static int MAX_VALUE = 50;
static List<KeyValue> kvs = Lists.newArrayList();
static{
for(String rowKey : URLS){
for(String qualifier : BROWSERS){
KeyValue kv = new KeyValue(
Bytes.toBytes(rowKey),
Bytes.toBytes(FAMILY),
Bytes.toBytes(qualifier),
TIMESTAMP,
KeyValue.Type.Put,
Bytes.toBytes("VvvV"));
kvs.add(kv);
}
}
}
/**
* Used for generating docs.
*/
public static void main(String... args) throws IOException{
System.out.println("-- inputs --");
System.out.println(KeyValueTestUtil.toStringWithPadding(kvs, true));
ByteArrayOutputStream os = new ByteArrayOutputStream(1<<20);
PrefixTreeEncoder encoder = new PrefixTreeEncoder(os, false);
for(KeyValue kv : kvs){
encoder.write(kv);
}
encoder.flush();
System.out.println("-- qualifier SortedPtBuilderNodes --");
for(TokenizerNode tokenizer : encoder.getQualifierWriter().getNonLeaves()){
System.out.println(tokenizer);
}
for(TokenizerNode tokenizerNode : encoder.getQualifierWriter().getLeaves()){
System.out.println(tokenizerNode);
}
System.out.println("-- qualifier PtColumnNodeWriters --");
for(ColumnNodeWriter writer : encoder.getQualifierWriter().getColumnNodeWriters()){
System.out.println(writer);
}
System.out.println("-- rowKey SortedPtBuilderNodes --");
for(TokenizerNode tokenizerNode : encoder.getRowWriter().getNonLeaves()){
System.out.println(tokenizerNode);
}
for(TokenizerNode tokenizerNode : encoder.getRowWriter().getLeaves()){
System.out.println(tokenizerNode);
}
System.out.println("-- row PtRowNodeWriters --");
for(RowNodeWriter writer : encoder.getRowWriter().getNonLeafWriters()){
System.out.println(writer);
}
for(RowNodeWriter writer : encoder.getRowWriter().getLeafWriters()){
System.out.println(writer);
}
System.out.println("-- concatenated values --");
System.out.println(Bytes.toStringBinary(encoder.getValueByteRange().deepCopyToNewArray()));
}
@Override
public List<KeyValue> getInputs() {
return kvs;
}
}

View File

@ -0,0 +1,45 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hbase.codec.prefixtree.timestamp;
import java.util.Collection;
import java.util.List;
import org.apache.hbase.codec.prefixtree.timestamp.data.TestTimestampDataBasic;
import org.apache.hbase.codec.prefixtree.timestamp.data.TestTimestampDataNumbers;
import org.apache.hbase.codec.prefixtree.timestamp.data.TestTimestampDataRepeats;
import com.google.common.collect.Lists;
public interface TestTimestampData {
List<Long> getInputs();
long getMinimum();
List<Long> getOutputs();
public static class InMemory {
public Collection<Object[]> getAllAsObjectArray() {
List<Object[]> all = Lists.newArrayList();
all.add(new Object[] { new TestTimestampDataBasic() });
all.add(new Object[] { new TestTimestampDataNumbers() });
all.add(new Object[] { new TestTimestampDataRepeats() });
return all;
}
}
}

View File

@ -0,0 +1,92 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hbase.codec.prefixtree.timestamp;
import java.io.IOException;
import java.util.Collection;
import org.apache.hbase.codec.prefixtree.PrefixTreeBlockMeta;
import org.apache.hbase.codec.prefixtree.decode.timestamp.TimestampDecoder;
import org.apache.hbase.codec.prefixtree.encode.other.LongEncoder;
import org.junit.Assert;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
import org.junit.runners.Parameterized.Parameters;
@RunWith(Parameterized.class)
public class TestTimestampEncoder {
@Parameters
public static Collection<Object[]> parameters() {
return new TestTimestampData.InMemory().getAllAsObjectArray();
}
private TestTimestampData timestamps;
private PrefixTreeBlockMeta blockMeta;
private LongEncoder encoder;
private byte[] bytes;
private TimestampDecoder decoder;
public TestTimestampEncoder(TestTimestampData testTimestamps) throws IOException {
this.timestamps = testTimestamps;
this.blockMeta = new PrefixTreeBlockMeta();
this.blockMeta.setNumMetaBytes(0);
this.blockMeta.setNumRowBytes(0);
this.blockMeta.setNumQualifierBytes(0);
this.encoder = new LongEncoder();
for (Long ts : testTimestamps.getInputs()) {
encoder.add(ts);
}
encoder.compile();
blockMeta.setTimestampFields(encoder);
bytes = encoder.getByteArray();
decoder = new TimestampDecoder();
decoder.initOnBlock(blockMeta, bytes);
}
@Test
public void testCompressorMinimum() {
Assert.assertEquals(timestamps.getMinimum(), encoder.getMin());
}
@Test
public void testCompressorRoundTrip() {
long[] outputs = encoder.getSortedUniqueTimestamps();
for (int i = 0; i < timestamps.getOutputs().size(); ++i) {
long input = timestamps.getOutputs().get(i);
long output = outputs[i];
Assert.assertEquals(input, output);
}
}
@Test
public void testReaderMinimum() {
Assert.assertEquals(timestamps.getMinimum(), decoder.getLong(0));
}
@Test
public void testReaderRoundTrip() {
for (int i = 0; i < timestamps.getOutputs().size(); ++i) {
long input = timestamps.getOutputs().get(i);
long output = decoder.getLong(i);
Assert.assertEquals(input, output);
}
}
}

View File

@ -0,0 +1,54 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hbase.codec.prefixtree.timestamp.data;
import java.util.ArrayList;
import java.util.List;
import org.apache.hbase.codec.prefixtree.timestamp.TestTimestampData;
public class TestTimestampDataBasic implements TestTimestampData {
@Override
public List<Long> getInputs() {
List<Long> d = new ArrayList<Long>();
d.add(5L);
d.add(3L);
d.add(0L);
d.add(1L);
d.add(3L);
return d;
}
@Override
public long getMinimum() {
return 0L;
}
@Override
public List<Long> getOutputs() {
List<Long> d = new ArrayList<Long>();
d.add(0L);
d.add(1L);
d.add(3L);
d.add(5L);
return d;
}
}

View File

@ -0,0 +1,56 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hbase.codec.prefixtree.timestamp.data;
import java.util.ArrayList;
import java.util.List;
import org.apache.hbase.codec.prefixtree.timestamp.TestTimestampData;
public class TestTimestampDataNumbers implements TestTimestampData {
private int shift = 8;
@Override
public List<Long> getInputs() {
List<Long> d = new ArrayList<Long>();
d.add(5L << shift);
d.add(3L << shift);
d.add(7L << shift);
d.add(1L << shift);
d.add(3L << shift);
return d;
}
@Override
public long getMinimum() {
return 1L << shift;
}
@Override
public List<Long> getOutputs() {
List<Long> d = new ArrayList<Long>();
d.add(1L << shift);
d.add(3L << shift);
d.add(5L << shift);
d.add(7L << shift);
return d;
}
}

View File

@ -0,0 +1,52 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hbase.codec.prefixtree.timestamp.data;
import java.util.ArrayList;
import java.util.List;
import org.apache.hbase.codec.prefixtree.timestamp.TestTimestampData;
public class TestTimestampDataRepeats implements TestTimestampData {
private static long t = 1234567890L;
@Override
public List<Long> getInputs() {
List<Long> d = new ArrayList<Long>();
d.add(t);
d.add(t);
d.add(t);
d.add(t);
d.add(t);
return d;
}
@Override
public long getMinimum() {
return t;
}
@Override
public List<Long> getOutputs() {
List<Long> d = new ArrayList<Long>();
return d;
}
}

View File

@ -0,0 +1,34 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hbase.util.bytes;
import junit.framework.Assert;
import org.apache.hadoop.hbase.util.ByteRange;
import org.junit.Test;
public class TestByteRange {
@Test
public void testConstructor() {
ByteRange b = new ByteRange(new byte[] { 0, 1, 2 });
Assert.assertEquals(3, b.getLength());
}
}

View File

@ -0,0 +1,32 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hbase.util.comparator;
import java.util.Comparator;
import org.apache.hadoop.hbase.util.Bytes;
public class ByteArrayComparator implements Comparator<byte[]> {
@Override
public int compare(byte[] a, byte[] b) {
return Bytes.compareTo(a, b);
}
}

View File

@ -0,0 +1,33 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hbase.util.number;
import java.text.DecimalFormat;
public class NumberFormatter {
public static String addCommas(final Number pValue) {
if (pValue == null) {
return null;
}
String format = "###,###,###,###,###,###,###,###.#####################";
return new DecimalFormat(format).format(pValue);// biggest is 19 digits
}
}

View File

@ -0,0 +1,34 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hbase.util.number;
import java.util.Random;
public class RandomNumberUtils {
public static long nextPositiveLong(Random random) {
while (true) {
long value = random.nextLong();
if (value > 0) {
return value;
}
}
}
}

View File

@ -0,0 +1,122 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hbase.util.vint;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import org.junit.Assert;
import org.junit.Test;
/********************** tests *************************/
public class TestFIntTool {
@Test
public void testLeadingZeros() {
Assert.assertEquals(64, Long.numberOfLeadingZeros(0));
Assert.assertEquals(63, Long.numberOfLeadingZeros(1));
Assert.assertEquals(0, Long.numberOfLeadingZeros(Long.MIN_VALUE));
Assert.assertEquals(0, Long.numberOfLeadingZeros(-1));
Assert.assertEquals(1, Long.numberOfLeadingZeros(Long.MAX_VALUE));
Assert.assertEquals(1, Long.numberOfLeadingZeros(Long.MAX_VALUE - 1));
}
@Test
public void testMaxValueForNumBytes() {
Assert.assertEquals(255, UFIntTool.maxValueForNumBytes(1));
Assert.assertEquals(65535, UFIntTool.maxValueForNumBytes(2));
Assert.assertEquals(0xffffff, UFIntTool.maxValueForNumBytes(3));
Assert.assertEquals(0xffffffffffffffL, UFIntTool.maxValueForNumBytes(7));
}
@Test
public void testNumBytes() {
Assert.assertEquals(1, UFIntTool.numBytes(0));
Assert.assertEquals(1, UFIntTool.numBytes(1));
Assert.assertEquals(1, UFIntTool.numBytes(255));
Assert.assertEquals(2, UFIntTool.numBytes(256));
Assert.assertEquals(2, UFIntTool.numBytes(65535));
Assert.assertEquals(3, UFIntTool.numBytes(65536));
Assert.assertEquals(4, UFIntTool.numBytes(0xffffffffL));
Assert.assertEquals(5, UFIntTool.numBytes(0x100000000L));
Assert.assertEquals(4, UFIntTool.numBytes(Integer.MAX_VALUE));
Assert.assertEquals(8, UFIntTool.numBytes(Long.MAX_VALUE));
Assert.assertEquals(8, UFIntTool.numBytes(Long.MAX_VALUE - 1));
}
@Test
public void testGetBytes() {
Assert.assertArrayEquals(new byte[] { 0 }, UFIntTool.getBytes(1, 0));
Assert.assertArrayEquals(new byte[] { 1 }, UFIntTool.getBytes(1, 1));
Assert.assertArrayEquals(new byte[] { -1 }, UFIntTool.getBytes(1, 255));
Assert.assertArrayEquals(new byte[] { 1, 0 }, UFIntTool.getBytes(2, 256));
Assert.assertArrayEquals(new byte[] { 1, 3 }, UFIntTool.getBytes(2, 256 + 3));
Assert.assertArrayEquals(new byte[] { 1, -128 }, UFIntTool.getBytes(2, 256 + 128));
Assert.assertArrayEquals(new byte[] { 1, -1 }, UFIntTool.getBytes(2, 256 + 255));
Assert.assertArrayEquals(new byte[] { 127, -1, -1, -1 },
UFIntTool.getBytes(4, Integer.MAX_VALUE));
Assert.assertArrayEquals(new byte[] { 127, -1, -1, -1, -1, -1, -1, -1 },
UFIntTool.getBytes(8, Long.MAX_VALUE));
}
@Test
public void testFromBytes() {
Assert.assertEquals(0, UFIntTool.fromBytes(new byte[] { 0 }));
Assert.assertEquals(1, UFIntTool.fromBytes(new byte[] { 1 }));
Assert.assertEquals(255, UFIntTool.fromBytes(new byte[] { -1 }));
Assert.assertEquals(256, UFIntTool.fromBytes(new byte[] { 1, 0 }));
Assert.assertEquals(256 + 3, UFIntTool.fromBytes(new byte[] { 1, 3 }));
Assert.assertEquals(256 + 128, UFIntTool.fromBytes(new byte[] { 1, -128 }));
Assert.assertEquals(256 + 255, UFIntTool.fromBytes(new byte[] { 1, -1 }));
Assert.assertEquals(Integer.MAX_VALUE, UFIntTool.fromBytes(new byte[] { 127, -1, -1, -1 }));
Assert.assertEquals(Long.MAX_VALUE,
UFIntTool.fromBytes(new byte[] { 127, -1, -1, -1, -1, -1, -1, -1 }));
}
@Test
public void testRoundTrips() {
long[] values = new long[] { 0, 1, 2, 255, 256, 31123, 65535, 65536, 65537, 0xfffffeL,
0xffffffL, 0x1000000L, 0x1000001L, Integer.MAX_VALUE - 1, Integer.MAX_VALUE,
(long) Integer.MAX_VALUE + 1, Long.MAX_VALUE - 1, Long.MAX_VALUE };
for (int i = 0; i < values.length; ++i) {
Assert.assertEquals(values[i], UFIntTool.fromBytes(UFIntTool.getBytes(8, values[i])));
}
}
@Test
public void testWriteBytes() throws IOException {// copied from testGetBytes
Assert.assertArrayEquals(new byte[] { 0 }, bytesViaOutputStream(1, 0));
Assert.assertArrayEquals(new byte[] { 1 }, bytesViaOutputStream(1, 1));
Assert.assertArrayEquals(new byte[] { -1 }, bytesViaOutputStream(1, 255));
Assert.assertArrayEquals(new byte[] { 1, 0 }, bytesViaOutputStream(2, 256));
Assert.assertArrayEquals(new byte[] { 1, 3 }, bytesViaOutputStream(2, 256 + 3));
Assert.assertArrayEquals(new byte[] { 1, -128 }, bytesViaOutputStream(2, 256 + 128));
Assert.assertArrayEquals(new byte[] { 1, -1 }, bytesViaOutputStream(2, 256 + 255));
Assert.assertArrayEquals(new byte[] { 127, -1, -1, -1 },
bytesViaOutputStream(4, Integer.MAX_VALUE));
Assert.assertArrayEquals(new byte[] { 127, -1, -1, -1, -1, -1, -1, -1 },
bytesViaOutputStream(8, Long.MAX_VALUE));
}
private byte[] bytesViaOutputStream(int outputWidth, long value) throws IOException {
ByteArrayOutputStream os = new ByteArrayOutputStream();
UFIntTool.writeBytes(outputWidth, value, os);
return os.toByteArray();
}
}

View File

@ -0,0 +1,98 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hbase.util.vint;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.util.Random;
import org.junit.Assert;
import org.junit.Test;
public class TestVIntTool {
@Test
public void testNumBytes() {
Assert.assertEquals(1, UVIntTool.numBytes(0));
Assert.assertEquals(1, UVIntTool.numBytes(1));
Assert.assertEquals(1, UVIntTool.numBytes(100));
Assert.assertEquals(1, UVIntTool.numBytes(126));
Assert.assertEquals(1, UVIntTool.numBytes(127));
Assert.assertEquals(2, UVIntTool.numBytes(128));
Assert.assertEquals(2, UVIntTool.numBytes(129));
Assert.assertEquals(5, UVIntTool.numBytes(Integer.MAX_VALUE));
}
@Test
public void testWriteBytes() throws IOException {
Assert.assertArrayEquals(new byte[] { 0 }, bytesViaOutputStream(0));
Assert.assertArrayEquals(new byte[] { 1 }, bytesViaOutputStream(1));
Assert.assertArrayEquals(new byte[] { 63 }, bytesViaOutputStream(63));
Assert.assertArrayEquals(new byte[] { 127 }, bytesViaOutputStream(127));
Assert.assertArrayEquals(new byte[] { -128, 1 }, bytesViaOutputStream(128));
Assert.assertArrayEquals(new byte[] { -128 + 27, 1 }, bytesViaOutputStream(155));
Assert.assertArrayEquals(UVIntTool.MAX_VALUE_BYTES, bytesViaOutputStream(Integer.MAX_VALUE));
}
private byte[] bytesViaOutputStream(int value) throws IOException {
ByteArrayOutputStream os = new ByteArrayOutputStream();
UVIntTool.writeBytes(value, os);
return os.toByteArray();
}
@Test
public void testToBytes() {
Assert.assertArrayEquals(new byte[] { 0 }, UVIntTool.getBytes(0));
Assert.assertArrayEquals(new byte[] { 1 }, UVIntTool.getBytes(1));
Assert.assertArrayEquals(new byte[] { 63 }, UVIntTool.getBytes(63));
Assert.assertArrayEquals(new byte[] { 127 }, UVIntTool.getBytes(127));
Assert.assertArrayEquals(new byte[] { -128, 1 }, UVIntTool.getBytes(128));
Assert.assertArrayEquals(new byte[] { -128 + 27, 1 }, UVIntTool.getBytes(155));
Assert.assertArrayEquals(UVIntTool.MAX_VALUE_BYTES, UVIntTool.getBytes(Integer.MAX_VALUE));
}
@Test
public void testFromBytes() {
Assert.assertEquals(Integer.MAX_VALUE, UVIntTool.getInt(UVIntTool.MAX_VALUE_BYTES));
}
@Test
public void testRoundTrips() {
Random random = new Random();
for (int i = 0; i < 10000; ++i) {
int value = random.nextInt(Integer.MAX_VALUE);
byte[] bytes = UVIntTool.getBytes(value);
int roundTripped = UVIntTool.getInt(bytes);
Assert.assertEquals(value, roundTripped);
}
}
@Test
public void testInputStreams() throws IOException {
ByteArrayInputStream is;
is = new ByteArrayInputStream(new byte[] { 0 });
Assert.assertEquals(0, UVIntTool.getInt(is));
is = new ByteArrayInputStream(new byte[] { 5 });
Assert.assertEquals(5, UVIntTool.getInt(is));
is = new ByteArrayInputStream(new byte[] { -128 + 27, 1 });
Assert.assertEquals(155, UVIntTool.getInt(is));
}
}

View File

@ -0,0 +1,105 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hbase.util.vint;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.util.Random;
import org.apache.hbase.util.number.RandomNumberUtils;
import org.junit.Assert;
import org.junit.Test;
public class TestVLongTool {
@Test
public void testNumBytes() {
Assert.assertEquals(1, UVLongTool.numBytes(0));
Assert.assertEquals(1, UVLongTool.numBytes(1));
Assert.assertEquals(1, UVLongTool.numBytes(100));
Assert.assertEquals(1, UVLongTool.numBytes(126));
Assert.assertEquals(1, UVLongTool.numBytes(127));
Assert.assertEquals(2, UVLongTool.numBytes(128));
Assert.assertEquals(2, UVLongTool.numBytes(129));
Assert.assertEquals(9, UVLongTool.numBytes(Long.MAX_VALUE));
}
@Test
public void testToBytes() {
Assert.assertArrayEquals(new byte[] { 0 }, UVLongTool.getBytes(0));
Assert.assertArrayEquals(new byte[] { 1 }, UVLongTool.getBytes(1));
Assert.assertArrayEquals(new byte[] { 63 }, UVLongTool.getBytes(63));
Assert.assertArrayEquals(new byte[] { 127 }, UVLongTool.getBytes(127));
Assert.assertArrayEquals(new byte[] { -128, 1 }, UVLongTool.getBytes(128));
Assert.assertArrayEquals(new byte[] { -128 + 27, 1 }, UVLongTool.getBytes(155));
Assert.assertArrayEquals(UVLongTool.MAX_VALUE_BYTES, UVLongTool.getBytes(Long.MAX_VALUE));
}
@Test
public void testFromBytes() {
Assert.assertEquals(Long.MAX_VALUE, UVLongTool.getLong(UVLongTool.MAX_VALUE_BYTES));
}
@Test
public void testFromBytesOffset() {
Assert.assertEquals(Long.MAX_VALUE, UVLongTool.getLong(UVLongTool.MAX_VALUE_BYTES, 0));
long ms = 1318966363481L;
// System.out.println(ms);
byte[] bytes = UVLongTool.getBytes(ms);
// System.out.println(Arrays.toString(bytes));
long roundTripped = UVLongTool.getLong(bytes, 0);
Assert.assertEquals(ms, roundTripped);
int calculatedNumBytes = UVLongTool.numBytes(ms);
int actualNumBytes = bytes.length;
Assert.assertEquals(actualNumBytes, calculatedNumBytes);
byte[] shiftedBytes = new byte[1000];
int shift = 33;
System.arraycopy(bytes, 0, shiftedBytes, shift, bytes.length);
long shiftedRoundTrip = UVLongTool.getLong(shiftedBytes, shift);
Assert.assertEquals(ms, shiftedRoundTrip);
}
@Test
public void testRoundTrips() {
Random random = new Random();
for (int i = 0; i < 10000; ++i) {
long value = RandomNumberUtils.nextPositiveLong(random);
byte[] bytes = UVLongTool.getBytes(value);
long roundTripped = UVLongTool.getLong(bytes);
Assert.assertEquals(value, roundTripped);
int calculatedNumBytes = UVLongTool.numBytes(value);
int actualNumBytes = bytes.length;
Assert.assertEquals(actualNumBytes, calculatedNumBytes);
}
}
@Test
public void testInputStreams() throws IOException {
ByteArrayInputStream is;
is = new ByteArrayInputStream(new byte[] { 0 });
Assert.assertEquals(0, UVLongTool.getLong(is));
is = new ByteArrayInputStream(new byte[] { 5 });
Assert.assertEquals(5, UVLongTool.getLong(is));
is = new ByteArrayInputStream(new byte[] { -128 + 27, 1 });
Assert.assertEquals(155, UVLongTool.getLong(is));
}
}

View File

@ -282,6 +282,12 @@
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-client</artifactId>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-prefix-tree</artifactId>
<!-- unfortunately, runtime scope causes eclipse to put it in the compile time classpath -->
<scope>runtime</scope>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-common</artifactId>

View File

@ -112,9 +112,10 @@ import org.hbase.async.Scanner;
public class PerformanceEvaluation extends Configured implements Tool {
protected static final Log LOG = LogFactory.getLog(PerformanceEvaluation.class.getName());
private static final int ROW_LENGTH = 1000;
private static final int DEFAULT_ROW_PREFIX_LENGTH = 16;
private static final int VALUE_LENGTH = 1000;
private static final int ONE_GB = 1024 * 1024 * 1000;
private static final int ROWS_PER_GB = ONE_GB / ROW_LENGTH;
private static final int ROWS_PER_GB = ONE_GB / VALUE_LENGTH;
public static final byte[] COMPRESSION = Bytes.toBytes("NONE");
public static final byte[] TABLE_NAME = Bytes.toBytes("TestTable");
@ -127,6 +128,7 @@ public class PerformanceEvaluation extends Configured implements Tool {
private boolean miniCluster = false;
private boolean nomapred = false;
private int rowPrefixLength = DEFAULT_ROW_PREFIX_LENGTH;
private int N = 1;
private int R = ROWS_PER_GB;
private byte[] tableName = TABLE_NAME;
@ -537,10 +539,11 @@ public class PerformanceEvaluation extends Configured implements Tool {
if (this.presplitRegions == 0)
return new byte [0][];
byte[][] splits = new byte[this.presplitRegions][];
int numSplitPoints = presplitRegions - 1;
byte[][] splits = new byte[numSplitPoints][];
int jump = this.R / this.presplitRegions;
for (int i=0; i <this.presplitRegions; i++) {
int rowkey = jump * i;
for (int i=0; i < numSplitPoints; i++) {
int rowkey = jump * (1 + i);
splits[i] = format(rowkey);
}
return splits;
@ -931,9 +934,9 @@ public class PerformanceEvaluation extends Configured implements Tool {
if (row.size() != 1) {
throw new IOException((row.isEmpty() ? "No" : "Multiple (" + row.size() + ')')
+ " KeyValue found in row");
} else if (row.get(0).value().length != ROW_LENGTH) {
} else if (row.get(0).value().length != VALUE_LENGTH) {
throw new IOException("Invalid value length (found: " + row.get(0).value().length
+ ", expected: " + ROW_LENGTH + ") in row \""
+ ", expected: " + VALUE_LENGTH + ") in row \""
+ new String(row.get(0).key()) + '"');
}
}
@ -1420,7 +1423,7 @@ public class PerformanceEvaluation extends Configured implements Tool {
* number (Does absolute in case number is negative).
*/
public static byte [] format(final int number) {
byte [] b = new byte[10];
byte [] b = new byte[DEFAULT_ROW_PREFIX_LENGTH + 10];
int d = Math.abs(number);
for (int i = b.length - 1; i >= 0; i--) {
b[i] = (byte)((d % 10) + '0');
@ -1436,10 +1439,10 @@ public class PerformanceEvaluation extends Configured implements Tool {
* @return Generated random value to insert into a table cell.
*/
public static byte[] generateValue(final Random r) {
byte [] b = new byte [ROW_LENGTH];
byte [] b = new byte [VALUE_LENGTH];
int i = 0;
for(i = 0; i < (ROW_LENGTH-8); i += 8) {
for(i = 0; i < (VALUE_LENGTH-8); i += 8) {
b[i] = (byte) (65 + r.nextInt(26));
b[i+1] = b[i];
b[i+2] = b[i];
@ -1451,7 +1454,7 @@ public class PerformanceEvaluation extends Configured implements Tool {
}
byte a = (byte) (65 + r.nextInt(26));
for(; i < ROW_LENGTH; i++) {
for(; i < VALUE_LENGTH; i++) {
b[i] = a;
}
return b;

View File

@ -105,15 +105,15 @@ public class TestEncodedSeekers {
//write the data, but leave some in the memstore
doPuts(region);
//verify correctness when memstore contains data
doGets(region);
//verify correctness again after compacting
region.compactStores();
doGets(region);
Map<DataBlockEncoding, Integer> encodingCounts = cache.getEncodingCountsForTest();
// Ensure that compactions don't pollute the cache with unencoded blocks
@ -124,8 +124,8 @@ public class TestEncodedSeekers {
assertEquals(encoding, encodingInCache);
assertTrue(encodingCounts.get(encodingInCache) > 0);
}
private void doPuts(HRegion region) throws IOException{
LoadTestKVGenerator dataGenerator = new LoadTestKVGenerator(MIN_VALUE_SIZE, MAX_VALUE_SIZE);
for (int i = 0; i < NUM_ROWS; ++i) {
@ -146,8 +146,8 @@ public class TestEncodedSeekers {
}
}
}
private void doGets(HRegion region) throws IOException{
for (int i = 0; i < NUM_ROWS; ++i) {
final byte[] rowKey = LoadTestKVGenerator.md5PrefixedKey(i).getBytes();

11
pom.xml
View File

@ -42,7 +42,7 @@
<version>0.95-SNAPSHOT</version>
<name>HBase</name>
<description>
Apache HBase&#153; is the &amp;lt;a href="http://hadoop.apache.org"&amp;rt;Hadoop&lt;/a&amp;rt; database. Use it when you need
Apache HBase™ is the &amp;lt;a href="http://hadoop.apache.org"&amp;rt;Hadoop&lt;/a&amp;rt; database. Use it when you need
random, realtime read/write access to your Big Data.
This project's goal is the hosting of very large tables -- billions of rows X millions of columns -- atop clusters
of commodity hardware.
@ -56,6 +56,7 @@
<module>hbase-common</module>
<module>hbase-it</module>
<module>hbase-examples</module>
<module>hbase-prefix-tree</module>
</modules>
<scm>
<connection>scm:svn:http://svn.apache.org/repos/asf/hbase/trunk</connection>
@ -978,6 +979,14 @@
<type>test-jar</type>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-prefix-tree</artifactId>
<version>${project.version}</version>
<!-- unfortunately, runtime scope causes Eclipse to give compile time access which isn't
needed, however it is apparently needed to run things within Eclipse -->
<scope>runtime</scope>
</dependency>
<dependency>
<artifactId>hbase-examples</artifactId>
<groupId>org.apache.hbase</groupId>