HBASE-4676 Prefix Compression - Trie data block encoding (Matt Corgan)
git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1443289 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
f17165ae08
commit
b198a50434
|
@ -38,7 +38,10 @@ public enum DataBlockEncoding {
|
|||
// id 1 is reserved for the BITSET algorithm to be added later
|
||||
PREFIX(2, createEncoder("org.apache.hadoop.hbase.io.encoding.PrefixKeyDeltaEncoder")),
|
||||
DIFF(3, createEncoder("org.apache.hadoop.hbase.io.encoding.DiffKeyDeltaEncoder")),
|
||||
FAST_DIFF(4, createEncoder("org.apache.hadoop.hbase.io.encoding.FastDiffDeltaEncoder"));
|
||||
FAST_DIFF(4, createEncoder("org.apache.hadoop.hbase.io.encoding.FastDiffDeltaEncoder")),
|
||||
// id 5 is reserved for the COPY_KEY algorithm for benchmarking
|
||||
// COPY_KEY(5, createEncoder("org.apache.hadoop.hbase.io.encoding.CopyKeyDataBlockEncoder")),
|
||||
PREFIX_TREE(6, createEncoder("org.apache.hbase.codec.prefixtree.PrefixTreeCodec"));
|
||||
|
||||
private final short id;
|
||||
private final byte[] idInBytes;
|
||||
|
|
|
@ -18,6 +18,8 @@
|
|||
|
||||
package org.apache.hadoop.hbase.util;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.OutputStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
|
||||
|
@ -50,4 +52,14 @@ public class ByteRangeTool {
|
|||
return ranges;
|
||||
}
|
||||
|
||||
public static void write(OutputStream os, ByteRange byteRange) throws IOException {
|
||||
os.write(byteRange.getBytes(), byteRange.getOffset(), byteRange.getLength());
|
||||
}
|
||||
|
||||
public static void write(OutputStream os, ByteRange byteRange, int byteRangeInnerOffset)
|
||||
throws IOException {
|
||||
os.write(byteRange.getBytes(), byteRange.getOffset() + byteRangeInnerOffset,
|
||||
byteRange.getLength() - byteRangeInnerOffset);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -27,8 +27,10 @@ import java.nio.ByteBuffer;
|
|||
import java.nio.ByteOrder;
|
||||
import java.security.AccessController;
|
||||
import java.security.PrivilegedAction;
|
||||
import java.util.Collection;
|
||||
import java.util.Comparator;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
|
@ -43,6 +45,7 @@ import org.apache.hadoop.io.WritableUtils;
|
|||
import sun.misc.Unsafe;
|
||||
|
||||
import com.google.common.annotations.VisibleForTesting;
|
||||
import com.google.common.collect.Lists;
|
||||
|
||||
/**
|
||||
* Utility class that handles byte arrays, conversions to/from other types,
|
||||
|
@ -1718,4 +1721,44 @@ public class Bytes {
|
|||
return out;
|
||||
}
|
||||
|
||||
public static boolean equals(List<byte[]> a, List<byte[]> b) {
|
||||
if (a == null) {
|
||||
if (b == null) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
if (b == null) {
|
||||
return false;
|
||||
}
|
||||
if (a.size() != b.size()) {
|
||||
return false;
|
||||
}
|
||||
for (int i = 0; i < a.size(); ++i) {
|
||||
if (!Bytes.equals(a.get(i), b.get(i))) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
public static boolean isSorted(Collection<byte[]> arrays) {
|
||||
byte[] previous = new byte[0];
|
||||
for (byte[] array : IterableUtils.nullSafe(arrays)) {
|
||||
if (Bytes.compareTo(previous, array) > 0) {
|
||||
return false;
|
||||
}
|
||||
previous = array;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
public static List<byte[]> getUtf8ByteArrays(List<String> strings) {
|
||||
List<byte[]> byteArrays = Lists.newArrayListWithCapacity(CollectionUtils.nullSafeSize(strings));
|
||||
for (String s : IterableUtils.nullSafe(strings)) {
|
||||
byteArrays.add(Bytes.toBytes(s));
|
||||
}
|
||||
return byteArrays;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -28,12 +28,15 @@ import org.apache.hadoop.hbase.KeyValue;
|
|||
import org.apache.hadoop.hbase.util.ByteBufferUtils;
|
||||
import org.apache.hadoop.io.WritableUtils;
|
||||
|
||||
import com.google.common.primitives.Bytes;
|
||||
|
||||
/**
|
||||
* Generate list of key values which are very useful to test data block encoding
|
||||
* and compression.
|
||||
*/
|
||||
public class RedundantKVGenerator {
|
||||
// row settings
|
||||
static byte[] DEFAULT_COMMON_PREFIX = new byte[0];
|
||||
static int DEFAULT_NUMBER_OF_ROW_PREFIXES = 10;
|
||||
static int DEFAULT_AVERAGE_PREFIX_LENGTH = 6;
|
||||
static int DEFAULT_PREFIX_LENGTH_VARIANCE = 3;
|
||||
|
@ -107,6 +110,7 @@ public class RedundantKVGenerator {
|
|||
) {
|
||||
this.randomizer = randomizer;
|
||||
|
||||
this.commonPrefix = DEFAULT_COMMON_PREFIX;
|
||||
this.numberOfRowPrefixes = numberOfRowPrefixes;
|
||||
this.averagePrefixLength = averagePrefixLength;
|
||||
this.prefixLengthVariance = prefixLengthVariance;
|
||||
|
@ -115,7 +119,7 @@ public class RedundantKVGenerator {
|
|||
this.numberOfRows = numberOfRows;
|
||||
|
||||
this.chanceForSameQualifier = chanceForSameQualifier;
|
||||
this.chanceForSimiliarQualifier = chanceForSimiliarQualifier;
|
||||
this.chanceForSimilarQualifier = chanceForSimiliarQualifier;
|
||||
this.averageQualifierLength = averageQualifierLength;
|
||||
this.qualifierLengthVariance = qualifierLengthVariance;
|
||||
|
||||
|
@ -131,6 +135,7 @@ public class RedundantKVGenerator {
|
|||
private Random randomizer;
|
||||
|
||||
// row settings
|
||||
private byte[] commonPrefix;//global prefix before rowPrefixes
|
||||
private int numberOfRowPrefixes;
|
||||
private int averagePrefixLength = 6;
|
||||
private int prefixLengthVariance = 3;
|
||||
|
@ -138,9 +143,12 @@ public class RedundantKVGenerator {
|
|||
private int suffixLengthVariance = 3;
|
||||
private int numberOfRows = 500;
|
||||
|
||||
//family
|
||||
private byte[] family;
|
||||
|
||||
// qualifier
|
||||
private float chanceForSameQualifier = 0.5f;
|
||||
private float chanceForSimiliarQualifier = 0.4f;
|
||||
private float chanceForSimilarQualifier = 0.4f;
|
||||
private int averageQualifierLength = 9;
|
||||
private int qualifierLengthVariance = 3;
|
||||
|
||||
|
@ -161,7 +169,8 @@ public class RedundantKVGenerator {
|
|||
prefixLengthVariance;
|
||||
byte[] newPrefix = new byte[prefixLength];
|
||||
randomizer.nextBytes(newPrefix);
|
||||
prefixes.add(newPrefix);
|
||||
byte[] newPrefixWithCommon = newPrefix;
|
||||
prefixes.add(newPrefixWithCommon);
|
||||
}
|
||||
|
||||
// generate rest of the row
|
||||
|
@ -173,7 +182,8 @@ public class RedundantKVGenerator {
|
|||
int randomPrefix = randomizer.nextInt(prefixes.size());
|
||||
byte[] row = new byte[prefixes.get(randomPrefix).length +
|
||||
suffixLength];
|
||||
rows.add(row);
|
||||
byte[] rowWithCommonPrefix = Bytes.concat(commonPrefix, row);
|
||||
rows.add(rowWithCommonPrefix);
|
||||
}
|
||||
|
||||
return rows;
|
||||
|
@ -188,20 +198,22 @@ public class RedundantKVGenerator {
|
|||
List<KeyValue> result = new ArrayList<KeyValue>();
|
||||
|
||||
List<byte[]> rows = generateRows();
|
||||
Map<Integer, List<byte[]>> rowsToQualifier =
|
||||
new HashMap<Integer, List<byte[]>>();
|
||||
Map<Integer, List<byte[]>> rowsToQualifier = new HashMap<Integer, List<byte[]>>();
|
||||
|
||||
byte[] family = new byte[columnFamilyLength];
|
||||
randomizer.nextBytes(family);
|
||||
if(family==null){
|
||||
family = new byte[columnFamilyLength];
|
||||
randomizer.nextBytes(family);
|
||||
}
|
||||
|
||||
long baseTimestamp = Math.abs(randomizer.nextLong()) /
|
||||
baseTimestampDivide;
|
||||
long baseTimestamp = Math.abs(randomizer.nextLong()) / baseTimestampDivide;
|
||||
|
||||
byte[] value = new byte[valueLength];
|
||||
|
||||
for (int i = 0; i < howMany; ++i) {
|
||||
long timestamp = baseTimestamp + randomizer.nextInt(
|
||||
timestampDiffSize);
|
||||
long timestamp = baseTimestamp;
|
||||
if(timestampDiffSize > 0){
|
||||
timestamp += randomizer.nextInt(timestampDiffSize);
|
||||
}
|
||||
Integer rowId = randomizer.nextInt(rows.size());
|
||||
byte[] row = rows.get(rowId);
|
||||
|
||||
|
@ -209,13 +221,11 @@ public class RedundantKVGenerator {
|
|||
// occasionally completely different
|
||||
byte[] qualifier;
|
||||
float qualifierChance = randomizer.nextFloat();
|
||||
if (!rowsToQualifier.containsKey(rowId) ||
|
||||
qualifierChance > chanceForSameQualifier +
|
||||
chanceForSimiliarQualifier) {
|
||||
if (!rowsToQualifier.containsKey(rowId)
|
||||
|| qualifierChance > chanceForSameQualifier + chanceForSimilarQualifier) {
|
||||
int qualifierLength = averageQualifierLength;
|
||||
qualifierLength +=
|
||||
randomizer.nextInt(2 * qualifierLengthVariance + 1) -
|
||||
qualifierLengthVariance;
|
||||
qualifierLength += randomizer.nextInt(2 * qualifierLengthVariance + 1)
|
||||
- qualifierLengthVariance;
|
||||
qualifier = new byte[qualifierLength];
|
||||
randomizer.nextBytes(qualifier);
|
||||
|
||||
|
@ -227,8 +237,8 @@ public class RedundantKVGenerator {
|
|||
} else if (qualifierChance > chanceForSameQualifier) {
|
||||
// similar qualifier
|
||||
List<byte[]> previousQualifiers = rowsToQualifier.get(rowId);
|
||||
byte[] originalQualifier = previousQualifiers.get(
|
||||
randomizer.nextInt(previousQualifiers.size()));
|
||||
byte[] originalQualifier = previousQualifiers.get(randomizer.nextInt(previousQualifiers
|
||||
.size()));
|
||||
|
||||
qualifier = new byte[originalQualifier.length];
|
||||
int commonPrefix = randomizer.nextInt(qualifier.length);
|
||||
|
@ -241,8 +251,7 @@ public class RedundantKVGenerator {
|
|||
} else {
|
||||
// same qualifier
|
||||
List<byte[]> previousQualifiers = rowsToQualifier.get(rowId);
|
||||
qualifier = previousQualifiers.get(
|
||||
randomizer.nextInt(previousQualifiers.size()));
|
||||
qualifier = previousQualifiers.get(randomizer.nextInt(previousQualifiers.size()));
|
||||
}
|
||||
|
||||
if (randomizer.nextFloat() < chanceForZeroValue) {
|
||||
|
@ -286,5 +295,99 @@ public class RedundantKVGenerator {
|
|||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
/************************ get/set ***********************************/
|
||||
|
||||
public RedundantKVGenerator setCommonPrefix(byte[] prefix){
|
||||
this.commonPrefix = prefix;
|
||||
return this;
|
||||
}
|
||||
|
||||
public RedundantKVGenerator setRandomizer(Random randomizer) {
|
||||
this.randomizer = randomizer;
|
||||
return this;
|
||||
}
|
||||
|
||||
public RedundantKVGenerator setNumberOfRowPrefixes(int numberOfRowPrefixes) {
|
||||
this.numberOfRowPrefixes = numberOfRowPrefixes;
|
||||
return this;
|
||||
}
|
||||
|
||||
public RedundantKVGenerator setAveragePrefixLength(int averagePrefixLength) {
|
||||
this.averagePrefixLength = averagePrefixLength;
|
||||
return this;
|
||||
}
|
||||
|
||||
public RedundantKVGenerator setPrefixLengthVariance(int prefixLengthVariance) {
|
||||
this.prefixLengthVariance = prefixLengthVariance;
|
||||
return this;
|
||||
}
|
||||
|
||||
public RedundantKVGenerator setAverageSuffixLength(int averageSuffixLength) {
|
||||
this.averageSuffixLength = averageSuffixLength;
|
||||
return this;
|
||||
}
|
||||
|
||||
public RedundantKVGenerator setSuffixLengthVariance(int suffixLengthVariance) {
|
||||
this.suffixLengthVariance = suffixLengthVariance;
|
||||
return this;
|
||||
}
|
||||
|
||||
public RedundantKVGenerator setNumberOfRows(int numberOfRows) {
|
||||
this.numberOfRows = numberOfRows;
|
||||
return this;
|
||||
}
|
||||
|
||||
public RedundantKVGenerator setChanceForSameQualifier(float chanceForSameQualifier) {
|
||||
this.chanceForSameQualifier = chanceForSameQualifier;
|
||||
return this;
|
||||
}
|
||||
|
||||
public RedundantKVGenerator setChanceForSimilarQualifier(float chanceForSimiliarQualifier) {
|
||||
this.chanceForSimilarQualifier = chanceForSimiliarQualifier;
|
||||
return this;
|
||||
}
|
||||
|
||||
public RedundantKVGenerator setAverageQualifierLength(int averageQualifierLength) {
|
||||
this.averageQualifierLength = averageQualifierLength;
|
||||
return this;
|
||||
}
|
||||
|
||||
public RedundantKVGenerator setQualifierLengthVariance(int qualifierLengthVariance) {
|
||||
this.qualifierLengthVariance = qualifierLengthVariance;
|
||||
return this;
|
||||
}
|
||||
|
||||
public RedundantKVGenerator setColumnFamilyLength(int columnFamilyLength) {
|
||||
this.columnFamilyLength = columnFamilyLength;
|
||||
return this;
|
||||
}
|
||||
|
||||
public RedundantKVGenerator setFamily(byte[] family) {
|
||||
this.family = family;
|
||||
this.columnFamilyLength = family.length;
|
||||
return this;
|
||||
}
|
||||
|
||||
public RedundantKVGenerator setValueLength(int valueLength) {
|
||||
this.valueLength = valueLength;
|
||||
return this;
|
||||
}
|
||||
|
||||
public RedundantKVGenerator setChanceForZeroValue(float chanceForZeroValue) {
|
||||
this.chanceForZeroValue = chanceForZeroValue;
|
||||
return this;
|
||||
}
|
||||
|
||||
public RedundantKVGenerator setBaseTimestampDivide(int baseTimestampDivide) {
|
||||
this.baseTimestampDivide = baseTimestampDivide;
|
||||
return this;
|
||||
}
|
||||
|
||||
public RedundantKVGenerator setTimestampDiffSize(int timestampDiffSize) {
|
||||
this.timestampDiffSize = timestampDiffSize;
|
||||
return this;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -79,22 +79,39 @@ public class CellComparator implements Comparator<Cell>, Serializable{
|
|||
/**************** equals ****************************/
|
||||
|
||||
public static boolean equals(Cell a, Cell b){
|
||||
if (!areKeyLengthsEqual(a, b)) {
|
||||
return false;
|
||||
}
|
||||
//TODO compare byte[]'s in reverse since later bytes more likely to differ
|
||||
return 0 == compareStatic(a, b);
|
||||
return equalsRow(a, b)
|
||||
&& equalsFamily(a, b)
|
||||
&& equalsQualifier(a, b)
|
||||
&& equalsTimestamp(a, b)
|
||||
&& equalsType(a, b);
|
||||
}
|
||||
|
||||
public static boolean equalsRow(Cell a, Cell b){
|
||||
if(!areRowLengthsEqual(a, b)){
|
||||
return false;
|
||||
}
|
||||
return 0 == Bytes.compareTo(
|
||||
return Bytes.equals(
|
||||
a.getRowArray(), a.getRowOffset(), a.getRowLength(),
|
||||
b.getRowArray(), b.getRowOffset(), b.getRowLength());
|
||||
}
|
||||
|
||||
public static boolean equalsFamily(Cell a, Cell b){
|
||||
return Bytes.equals(
|
||||
a.getFamilyArray(), a.getFamilyOffset(), a.getFamilyLength(),
|
||||
b.getFamilyArray(), b.getFamilyOffset(), b.getFamilyLength());
|
||||
}
|
||||
|
||||
public static boolean equalsQualifier(Cell a, Cell b){
|
||||
return Bytes.equals(
|
||||
a.getQualifierArray(), a.getQualifierOffset(), a.getQualifierLength(),
|
||||
b.getQualifierArray(), b.getQualifierOffset(), b.getQualifierLength());
|
||||
}
|
||||
|
||||
public static boolean equalsTimestamp(Cell a, Cell b){
|
||||
return a.getTimestamp() == b.getTimestamp();
|
||||
}
|
||||
|
||||
public static boolean equalsType(Cell a, Cell b){
|
||||
return a.getTypeByte() == b.getTypeByte();
|
||||
}
|
||||
|
||||
|
||||
/********************* hashCode ************************/
|
||||
|
||||
|
|
|
@ -18,6 +18,8 @@
|
|||
|
||||
package org.apache.hbase.cell;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.hadoop.classification.InterfaceAudience;
|
||||
import org.apache.hadoop.classification.InterfaceStability;
|
||||
import org.apache.hbase.Cell;
|
||||
|
@ -45,6 +47,6 @@ public interface CellOutputStream {
|
|||
* that can then be read from the implementation to be sent to disk, put in the block cache, or
|
||||
* sent over the network.
|
||||
*/
|
||||
void flush();
|
||||
void flush() throws IOException;
|
||||
|
||||
}
|
||||
|
|
|
@ -0,0 +1,62 @@
|
|||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<!--
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
-->
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
<parent>
|
||||
<artifactId>hbase</artifactId>
|
||||
<groupId>org.apache.hbase</groupId>
|
||||
<version>0.95-SNAPSHOT</version>
|
||||
<relativePath>..</relativePath>
|
||||
</parent>
|
||||
|
||||
<artifactId>hbase-prefix-tree</artifactId>
|
||||
<name>HBase - Prefix Tree</name>
|
||||
<description>Prefix Tree Data Block Encoder</description>
|
||||
|
||||
<build>
|
||||
<plugins>
|
||||
<plugin>
|
||||
<artifactId>maven-surefire-plugin</artifactId>
|
||||
<!-- Always skip the second part executions, since we only run
|
||||
simple unit tests in this module. -->
|
||||
<executions>
|
||||
<execution>
|
||||
<id>secondPartTestsExecution</id>
|
||||
<phase>test</phase>
|
||||
<goals>
|
||||
<goal>test</goal>
|
||||
</goals>
|
||||
<configuration>
|
||||
<skip>true</skip>
|
||||
</configuration>
|
||||
</execution>
|
||||
</executions>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</build>
|
||||
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>org.apache.hbase</groupId>
|
||||
<artifactId>hbase-common</artifactId>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
</project>
|
|
@ -0,0 +1,841 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hbase.codec.prefixtree;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.OutputStream;
|
||||
import java.nio.ByteBuffer;
|
||||
|
||||
import org.apache.hadoop.classification.InterfaceAudience;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
import org.apache.hbase.codec.prefixtree.encode.other.LongEncoder;
|
||||
import org.apache.hbase.util.vint.UVIntTool;
|
||||
import org.apache.hbase.util.vint.UVLongTool;
|
||||
|
||||
/**
|
||||
* Information about the block. Stored at the beginning of the byte[]. Contains things
|
||||
* like minimum timestamp and width of FInts in the row tree.
|
||||
*
|
||||
* Most fields stored in VInts that get decoded on the first access of each new block.
|
||||
*/
|
||||
@InterfaceAudience.Private
|
||||
public class PrefixTreeBlockMeta {
|
||||
|
||||
/******************* static fields ********************/
|
||||
|
||||
public static final int VERSION = 0;
|
||||
|
||||
public static final int MAX_FAMILY_LENGTH = Byte.MAX_VALUE;// hard-coded in KeyValue
|
||||
|
||||
public static final int
|
||||
NUM_LONGS = 2,
|
||||
NUM_INTS = 22,
|
||||
NUM_SHORTS = 0,//keyValueTypeWidth not persisted
|
||||
NUM_SINGLE_BYTES = 2,
|
||||
MAX_BYTES = Bytes.SIZEOF_LONG * NUM_LONGS
|
||||
+ Bytes.SIZEOF_SHORT * NUM_SHORTS
|
||||
+ Bytes.SIZEOF_INT * NUM_INTS
|
||||
+ NUM_SINGLE_BYTES;
|
||||
|
||||
|
||||
/**************** transient fields *********************/
|
||||
|
||||
protected int arrayOffset;
|
||||
protected int bufferOffset;
|
||||
|
||||
|
||||
/**************** persisted fields **********************/
|
||||
|
||||
// PrefixTree version to allow future format modifications
|
||||
protected int version;
|
||||
protected int numMetaBytes;
|
||||
protected int numKeyValueBytes;
|
||||
protected boolean includesMvccVersion;//probably don't need this explicitly, but only 1 byte
|
||||
|
||||
// split the byte[] into 6 sections for the different data types
|
||||
protected int numRowBytes;
|
||||
protected int numFamilyBytes;
|
||||
protected int numQualifierBytes;
|
||||
protected int numTimestampBytes;
|
||||
protected int numMvccVersionBytes;
|
||||
protected int numValueBytes;
|
||||
|
||||
// number of bytes in each section of fixed width FInts
|
||||
protected int nextNodeOffsetWidth;
|
||||
protected int familyOffsetWidth;
|
||||
protected int qualifierOffsetWidth;
|
||||
protected int timestampIndexWidth;
|
||||
protected int mvccVersionIndexWidth;
|
||||
protected int valueOffsetWidth;
|
||||
protected int valueLengthWidth;
|
||||
|
||||
// used to pre-allocate structures for reading
|
||||
protected int rowTreeDepth;
|
||||
protected int maxRowLength;
|
||||
protected int maxQualifierLength;
|
||||
|
||||
// the timestamp from which the deltas are calculated
|
||||
protected long minTimestamp;
|
||||
protected int timestampDeltaWidth;
|
||||
protected long minMvccVersion;
|
||||
protected int mvccVersionDeltaWidth;
|
||||
|
||||
protected boolean allSameType;
|
||||
protected byte allTypes;
|
||||
|
||||
protected int numUniqueRows;
|
||||
protected int numUniqueFamilies;
|
||||
protected int numUniqueQualifiers;
|
||||
|
||||
|
||||
/***************** constructors ********************/
|
||||
|
||||
public PrefixTreeBlockMeta() {
|
||||
}
|
||||
|
||||
public PrefixTreeBlockMeta(InputStream is) throws IOException{
|
||||
this.version = VERSION;
|
||||
this.arrayOffset = 0;
|
||||
this.bufferOffset = 0;
|
||||
readVariableBytesFromInputStream(is);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param buffer positioned at start of PtBlockMeta
|
||||
*/
|
||||
public PrefixTreeBlockMeta(ByteBuffer buffer) {
|
||||
initOnBlock(buffer);
|
||||
}
|
||||
|
||||
public void initOnBlock(ByteBuffer buffer) {
|
||||
arrayOffset = buffer.arrayOffset();
|
||||
bufferOffset = buffer.position();
|
||||
readVariableBytesFromArray(buffer.array(), arrayOffset + bufferOffset);
|
||||
}
|
||||
|
||||
|
||||
/**************** operate on each field **********************/
|
||||
|
||||
public int calculateNumMetaBytes(){
|
||||
int numBytes = 0;
|
||||
numBytes += UVIntTool.numBytes(version);
|
||||
numBytes += UVLongTool.numBytes(numMetaBytes);
|
||||
numBytes += UVIntTool.numBytes(numKeyValueBytes);
|
||||
++numBytes;//os.write(getIncludesMvccVersion());
|
||||
|
||||
numBytes += UVIntTool.numBytes(numRowBytes);
|
||||
numBytes += UVIntTool.numBytes(numFamilyBytes);
|
||||
numBytes += UVIntTool.numBytes(numQualifierBytes);
|
||||
numBytes += UVIntTool.numBytes(numTimestampBytes);
|
||||
numBytes += UVIntTool.numBytes(numMvccVersionBytes);
|
||||
numBytes += UVIntTool.numBytes(numValueBytes);
|
||||
|
||||
numBytes += UVIntTool.numBytes(nextNodeOffsetWidth);
|
||||
numBytes += UVIntTool.numBytes(familyOffsetWidth);
|
||||
numBytes += UVIntTool.numBytes(qualifierOffsetWidth);
|
||||
numBytes += UVIntTool.numBytes(timestampIndexWidth);
|
||||
numBytes += UVIntTool.numBytes(mvccVersionIndexWidth);
|
||||
numBytes += UVIntTool.numBytes(valueOffsetWidth);
|
||||
numBytes += UVIntTool.numBytes(valueLengthWidth);
|
||||
|
||||
numBytes += UVIntTool.numBytes(rowTreeDepth);
|
||||
numBytes += UVIntTool.numBytes(maxRowLength);
|
||||
numBytes += UVIntTool.numBytes(maxQualifierLength);
|
||||
|
||||
numBytes += UVLongTool.numBytes(minTimestamp);
|
||||
numBytes += UVIntTool.numBytes(timestampDeltaWidth);
|
||||
numBytes += UVLongTool.numBytes(minMvccVersion);
|
||||
numBytes += UVIntTool.numBytes(mvccVersionDeltaWidth);
|
||||
++numBytes;//os.write(getAllSameTypeByte());
|
||||
++numBytes;//os.write(allTypes);
|
||||
|
||||
numBytes += UVIntTool.numBytes(numUniqueRows);
|
||||
numBytes += UVIntTool.numBytes(numUniqueFamilies);
|
||||
numBytes += UVIntTool.numBytes(numUniqueQualifiers);
|
||||
return numBytes;
|
||||
}
|
||||
|
||||
public void writeVariableBytesToOutputStream(OutputStream os) throws IOException{
|
||||
UVIntTool.writeBytes(version, os);
|
||||
UVIntTool.writeBytes(numMetaBytes, os);
|
||||
UVIntTool.writeBytes(numKeyValueBytes, os);
|
||||
os.write(getIncludesMvccVersionByte());
|
||||
|
||||
UVIntTool.writeBytes(numRowBytes, os);
|
||||
UVIntTool.writeBytes(numFamilyBytes, os);
|
||||
UVIntTool.writeBytes(numQualifierBytes, os);
|
||||
UVIntTool.writeBytes(numTimestampBytes, os);
|
||||
UVIntTool.writeBytes(numMvccVersionBytes, os);
|
||||
UVIntTool.writeBytes(numValueBytes, os);
|
||||
|
||||
UVIntTool.writeBytes(nextNodeOffsetWidth, os);
|
||||
UVIntTool.writeBytes(familyOffsetWidth, os);
|
||||
UVIntTool.writeBytes(qualifierOffsetWidth, os);
|
||||
UVIntTool.writeBytes(timestampIndexWidth, os);
|
||||
UVIntTool.writeBytes(mvccVersionIndexWidth, os);
|
||||
UVIntTool.writeBytes(valueOffsetWidth, os);
|
||||
UVIntTool.writeBytes(valueLengthWidth, os);
|
||||
|
||||
UVIntTool.writeBytes(rowTreeDepth, os);
|
||||
UVIntTool.writeBytes(maxRowLength, os);
|
||||
UVIntTool.writeBytes(maxQualifierLength, os);
|
||||
|
||||
UVLongTool.writeBytes(minTimestamp, os);
|
||||
UVIntTool.writeBytes(timestampDeltaWidth, os);
|
||||
UVLongTool.writeBytes(minMvccVersion, os);
|
||||
UVIntTool.writeBytes(mvccVersionDeltaWidth, os);
|
||||
os.write(getAllSameTypeByte());
|
||||
os.write(allTypes);
|
||||
|
||||
UVIntTool.writeBytes(numUniqueRows, os);
|
||||
UVIntTool.writeBytes(numUniqueFamilies, os);
|
||||
UVIntTool.writeBytes(numUniqueQualifiers, os);
|
||||
}
|
||||
|
||||
public void readVariableBytesFromInputStream(InputStream is) throws IOException{
|
||||
version = UVIntTool.getInt(is);
|
||||
numMetaBytes = UVIntTool.getInt(is);
|
||||
numKeyValueBytes = UVIntTool.getInt(is);
|
||||
setIncludesMvccVersion((byte) is.read());
|
||||
|
||||
numRowBytes = UVIntTool.getInt(is);
|
||||
numFamilyBytes = UVIntTool.getInt(is);
|
||||
numQualifierBytes = UVIntTool.getInt(is);
|
||||
numTimestampBytes = UVIntTool.getInt(is);
|
||||
numMvccVersionBytes = UVIntTool.getInt(is);
|
||||
numValueBytes = UVIntTool.getInt(is);
|
||||
|
||||
nextNodeOffsetWidth = UVIntTool.getInt(is);
|
||||
familyOffsetWidth = UVIntTool.getInt(is);
|
||||
qualifierOffsetWidth = UVIntTool.getInt(is);
|
||||
timestampIndexWidth = UVIntTool.getInt(is);
|
||||
mvccVersionIndexWidth = UVIntTool.getInt(is);
|
||||
valueOffsetWidth = UVIntTool.getInt(is);
|
||||
valueLengthWidth = UVIntTool.getInt(is);
|
||||
|
||||
rowTreeDepth = UVIntTool.getInt(is);
|
||||
maxRowLength = UVIntTool.getInt(is);
|
||||
maxQualifierLength = UVIntTool.getInt(is);
|
||||
|
||||
minTimestamp = UVLongTool.getLong(is);
|
||||
timestampDeltaWidth = UVIntTool.getInt(is);
|
||||
minMvccVersion = UVLongTool.getLong(is);
|
||||
mvccVersionDeltaWidth = UVIntTool.getInt(is);
|
||||
|
||||
setAllSameType((byte) is.read());
|
||||
allTypes = (byte) is.read();
|
||||
|
||||
numUniqueRows = UVIntTool.getInt(is);
|
||||
numUniqueFamilies = UVIntTool.getInt(is);
|
||||
numUniqueQualifiers = UVIntTool.getInt(is);
|
||||
}
|
||||
|
||||
public void readVariableBytesFromArray(byte[] bytes, int offset) {
|
||||
int position = offset;
|
||||
|
||||
version = UVIntTool.getInt(bytes, position);
|
||||
position += UVIntTool.numBytes(version);
|
||||
numMetaBytes = UVIntTool.getInt(bytes, position);
|
||||
position += UVIntTool.numBytes(numMetaBytes);
|
||||
numKeyValueBytes = UVIntTool.getInt(bytes, position);
|
||||
position += UVIntTool.numBytes(numKeyValueBytes);
|
||||
setIncludesMvccVersion(bytes[position]);
|
||||
++position;
|
||||
|
||||
numRowBytes = UVIntTool.getInt(bytes, position);
|
||||
position += UVIntTool.numBytes(numRowBytes);
|
||||
numFamilyBytes = UVIntTool.getInt(bytes, position);
|
||||
position += UVIntTool.numBytes(numFamilyBytes);
|
||||
numQualifierBytes = UVIntTool.getInt(bytes, position);
|
||||
position += UVIntTool.numBytes(numQualifierBytes);
|
||||
numTimestampBytes = UVIntTool.getInt(bytes, position);
|
||||
position += UVIntTool.numBytes(numTimestampBytes);
|
||||
numMvccVersionBytes = UVIntTool.getInt(bytes, position);
|
||||
position += UVIntTool.numBytes(numMvccVersionBytes);
|
||||
numValueBytes = UVIntTool.getInt(bytes, position);
|
||||
position += UVIntTool.numBytes(numValueBytes);
|
||||
|
||||
nextNodeOffsetWidth = UVIntTool.getInt(bytes, position);
|
||||
position += UVIntTool.numBytes(nextNodeOffsetWidth);
|
||||
familyOffsetWidth = UVIntTool.getInt(bytes, position);
|
||||
position += UVIntTool.numBytes(familyOffsetWidth);
|
||||
qualifierOffsetWidth = UVIntTool.getInt(bytes, position);
|
||||
position += UVIntTool.numBytes(qualifierOffsetWidth);
|
||||
timestampIndexWidth = UVIntTool.getInt(bytes, position);
|
||||
position += UVIntTool.numBytes(timestampIndexWidth);
|
||||
mvccVersionIndexWidth = UVIntTool.getInt(bytes, position);
|
||||
position += UVIntTool.numBytes(mvccVersionIndexWidth);
|
||||
valueOffsetWidth = UVIntTool.getInt(bytes, position);
|
||||
position += UVIntTool.numBytes(valueOffsetWidth);
|
||||
valueLengthWidth = UVIntTool.getInt(bytes, position);
|
||||
position += UVIntTool.numBytes(valueLengthWidth);
|
||||
|
||||
rowTreeDepth = UVIntTool.getInt(bytes, position);
|
||||
position += UVIntTool.numBytes(rowTreeDepth);
|
||||
maxRowLength = UVIntTool.getInt(bytes, position);
|
||||
position += UVIntTool.numBytes(maxRowLength);
|
||||
maxQualifierLength = UVIntTool.getInt(bytes, position);
|
||||
position += UVIntTool.numBytes(maxQualifierLength);
|
||||
|
||||
minTimestamp = UVLongTool.getLong(bytes, position);
|
||||
position += UVLongTool.numBytes(minTimestamp);
|
||||
timestampDeltaWidth = UVIntTool.getInt(bytes, position);
|
||||
position += UVIntTool.numBytes(timestampDeltaWidth);
|
||||
minMvccVersion = UVLongTool.getLong(bytes, position);
|
||||
position += UVLongTool.numBytes(minMvccVersion);
|
||||
mvccVersionDeltaWidth = UVIntTool.getInt(bytes, position);
|
||||
position += UVIntTool.numBytes(mvccVersionDeltaWidth);
|
||||
|
||||
setAllSameType(bytes[position]);
|
||||
++position;
|
||||
allTypes = bytes[position];
|
||||
++position;
|
||||
|
||||
numUniqueRows = UVIntTool.getInt(bytes, position);
|
||||
position += UVIntTool.numBytes(numUniqueRows);
|
||||
numUniqueFamilies = UVIntTool.getInt(bytes, position);
|
||||
position += UVIntTool.numBytes(numUniqueFamilies);
|
||||
numUniqueQualifiers = UVIntTool.getInt(bytes, position);
|
||||
position += UVIntTool.numBytes(numUniqueQualifiers);
|
||||
}
|
||||
|
||||
//TODO method that can read directly from ByteBuffer instead of InputStream
|
||||
|
||||
|
||||
/*************** methods *************************/
|
||||
|
||||
public int getKeyValueTypeWidth() {
|
||||
return allSameType ? 0 : 1;
|
||||
}
|
||||
|
||||
public byte getIncludesMvccVersionByte() {
|
||||
return includesMvccVersion ? (byte) 1 : (byte) 0;
|
||||
}
|
||||
|
||||
public void setIncludesMvccVersion(byte includesMvccVersionByte) {
|
||||
includesMvccVersion = includesMvccVersionByte != 0;
|
||||
}
|
||||
|
||||
public byte getAllSameTypeByte() {
|
||||
return allSameType ? (byte) 1 : (byte) 0;
|
||||
}
|
||||
|
||||
public void setAllSameType(byte allSameTypeByte) {
|
||||
allSameType = allSameTypeByte != 0;
|
||||
}
|
||||
|
||||
public boolean isAllSameTimestamp() {
|
||||
return timestampIndexWidth == 0;
|
||||
}
|
||||
|
||||
public boolean isAllSameMvccVersion() {
|
||||
return mvccVersionIndexWidth == 0;
|
||||
}
|
||||
|
||||
public void setTimestampFields(LongEncoder encoder){
|
||||
this.minTimestamp = encoder.getMin();
|
||||
this.timestampIndexWidth = encoder.getBytesPerIndex();
|
||||
this.timestampDeltaWidth = encoder.getBytesPerDelta();
|
||||
this.numTimestampBytes = encoder.getTotalCompressedBytes();
|
||||
}
|
||||
|
||||
public void setMvccVersionFields(LongEncoder encoder){
|
||||
this.minMvccVersion = encoder.getMin();
|
||||
this.mvccVersionIndexWidth = encoder.getBytesPerIndex();
|
||||
this.mvccVersionDeltaWidth = encoder.getBytesPerDelta();
|
||||
this.numMvccVersionBytes = encoder.getTotalCompressedBytes();
|
||||
}
|
||||
|
||||
|
||||
/*************** Object methods *************************/
|
||||
|
||||
/**
|
||||
* Generated by Eclipse
|
||||
*/
|
||||
@Override
|
||||
public boolean equals(Object obj) {
|
||||
if (this == obj)
|
||||
return true;
|
||||
if (obj == null)
|
||||
return false;
|
||||
if (getClass() != obj.getClass())
|
||||
return false;
|
||||
PrefixTreeBlockMeta other = (PrefixTreeBlockMeta) obj;
|
||||
if (allSameType != other.allSameType)
|
||||
return false;
|
||||
if (allTypes != other.allTypes)
|
||||
return false;
|
||||
if (arrayOffset != other.arrayOffset)
|
||||
return false;
|
||||
if (bufferOffset != other.bufferOffset)
|
||||
return false;
|
||||
if (valueLengthWidth != other.valueLengthWidth)
|
||||
return false;
|
||||
if (valueOffsetWidth != other.valueOffsetWidth)
|
||||
return false;
|
||||
if (familyOffsetWidth != other.familyOffsetWidth)
|
||||
return false;
|
||||
if (includesMvccVersion != other.includesMvccVersion)
|
||||
return false;
|
||||
if (maxQualifierLength != other.maxQualifierLength)
|
||||
return false;
|
||||
if (maxRowLength != other.maxRowLength)
|
||||
return false;
|
||||
if (mvccVersionDeltaWidth != other.mvccVersionDeltaWidth)
|
||||
return false;
|
||||
if (mvccVersionIndexWidth != other.mvccVersionIndexWidth)
|
||||
return false;
|
||||
if (minMvccVersion != other.minMvccVersion)
|
||||
return false;
|
||||
if (minTimestamp != other.minTimestamp)
|
||||
return false;
|
||||
if (nextNodeOffsetWidth != other.nextNodeOffsetWidth)
|
||||
return false;
|
||||
if (numValueBytes != other.numValueBytes)
|
||||
return false;
|
||||
if (numFamilyBytes != other.numFamilyBytes)
|
||||
return false;
|
||||
if (numMvccVersionBytes != other.numMvccVersionBytes)
|
||||
return false;
|
||||
if (numMetaBytes != other.numMetaBytes)
|
||||
return false;
|
||||
if (numQualifierBytes != other.numQualifierBytes)
|
||||
return false;
|
||||
if (numRowBytes != other.numRowBytes)
|
||||
return false;
|
||||
if (numTimestampBytes != other.numTimestampBytes)
|
||||
return false;
|
||||
if (numUniqueFamilies != other.numUniqueFamilies)
|
||||
return false;
|
||||
if (numUniqueQualifiers != other.numUniqueQualifiers)
|
||||
return false;
|
||||
if (numUniqueRows != other.numUniqueRows)
|
||||
return false;
|
||||
if (numKeyValueBytes != other.numKeyValueBytes)
|
||||
return false;
|
||||
if (qualifierOffsetWidth != other.qualifierOffsetWidth)
|
||||
return false;
|
||||
if (rowTreeDepth != other.rowTreeDepth)
|
||||
return false;
|
||||
if (timestampDeltaWidth != other.timestampDeltaWidth)
|
||||
return false;
|
||||
if (timestampIndexWidth != other.timestampIndexWidth)
|
||||
return false;
|
||||
if (version != other.version)
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Generated by Eclipse
|
||||
*/
|
||||
@Override
|
||||
public int hashCode() {
|
||||
final int prime = 31;
|
||||
int result = 1;
|
||||
result = prime * result + (allSameType ? 1231 : 1237);
|
||||
result = prime * result + allTypes;
|
||||
result = prime * result + arrayOffset;
|
||||
result = prime * result + bufferOffset;
|
||||
result = prime * result + valueLengthWidth;
|
||||
result = prime * result + valueOffsetWidth;
|
||||
result = prime * result + familyOffsetWidth;
|
||||
result = prime * result + (includesMvccVersion ? 1231 : 1237);
|
||||
result = prime * result + maxQualifierLength;
|
||||
result = prime * result + maxRowLength;
|
||||
result = prime * result + mvccVersionDeltaWidth;
|
||||
result = prime * result + mvccVersionIndexWidth;
|
||||
result = prime * result + (int) (minMvccVersion ^ (minMvccVersion >>> 32));
|
||||
result = prime * result + (int) (minTimestamp ^ (minTimestamp >>> 32));
|
||||
result = prime * result + nextNodeOffsetWidth;
|
||||
result = prime * result + numValueBytes;
|
||||
result = prime * result + numFamilyBytes;
|
||||
result = prime * result + numMvccVersionBytes;
|
||||
result = prime * result + numMetaBytes;
|
||||
result = prime * result + numQualifierBytes;
|
||||
result = prime * result + numRowBytes;
|
||||
result = prime * result + numTimestampBytes;
|
||||
result = prime * result + numUniqueFamilies;
|
||||
result = prime * result + numUniqueQualifiers;
|
||||
result = prime * result + numUniqueRows;
|
||||
result = prime * result + numKeyValueBytes;
|
||||
result = prime * result + qualifierOffsetWidth;
|
||||
result = prime * result + rowTreeDepth;
|
||||
result = prime * result + timestampDeltaWidth;
|
||||
result = prime * result + timestampIndexWidth;
|
||||
result = prime * result + version;
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Generated by Eclipse
|
||||
*/
|
||||
@Override
|
||||
public String toString() {
|
||||
StringBuilder builder = new StringBuilder();
|
||||
builder.append("PtBlockMeta [arrayOffset=");
|
||||
builder.append(arrayOffset);
|
||||
builder.append(", bufferOffset=");
|
||||
builder.append(bufferOffset);
|
||||
builder.append(", version=");
|
||||
builder.append(version);
|
||||
builder.append(", numMetaBytes=");
|
||||
builder.append(numMetaBytes);
|
||||
builder.append(", numKeyValueBytes=");
|
||||
builder.append(numKeyValueBytes);
|
||||
builder.append(", includesMvccVersion=");
|
||||
builder.append(includesMvccVersion);
|
||||
builder.append(", numRowBytes=");
|
||||
builder.append(numRowBytes);
|
||||
builder.append(", numFamilyBytes=");
|
||||
builder.append(numFamilyBytes);
|
||||
builder.append(", numQualifierBytes=");
|
||||
builder.append(numQualifierBytes);
|
||||
builder.append(", numTimestampBytes=");
|
||||
builder.append(numTimestampBytes);
|
||||
builder.append(", numMvccVersionBytes=");
|
||||
builder.append(numMvccVersionBytes);
|
||||
builder.append(", numValueBytes=");
|
||||
builder.append(numValueBytes);
|
||||
builder.append(", nextNodeOffsetWidth=");
|
||||
builder.append(nextNodeOffsetWidth);
|
||||
builder.append(", familyOffsetWidth=");
|
||||
builder.append(familyOffsetWidth);
|
||||
builder.append(", qualifierOffsetWidth=");
|
||||
builder.append(qualifierOffsetWidth);
|
||||
builder.append(", timestampIndexWidth=");
|
||||
builder.append(timestampIndexWidth);
|
||||
builder.append(", mvccVersionIndexWidth=");
|
||||
builder.append(mvccVersionIndexWidth);
|
||||
builder.append(", valueOffsetWidth=");
|
||||
builder.append(valueOffsetWidth);
|
||||
builder.append(", valueLengthWidth=");
|
||||
builder.append(valueLengthWidth);
|
||||
builder.append(", rowTreeDepth=");
|
||||
builder.append(rowTreeDepth);
|
||||
builder.append(", maxRowLength=");
|
||||
builder.append(maxRowLength);
|
||||
builder.append(", maxQualifierLength=");
|
||||
builder.append(maxQualifierLength);
|
||||
builder.append(", minTimestamp=");
|
||||
builder.append(minTimestamp);
|
||||
builder.append(", timestampDeltaWidth=");
|
||||
builder.append(timestampDeltaWidth);
|
||||
builder.append(", minMvccVersion=");
|
||||
builder.append(minMvccVersion);
|
||||
builder.append(", mvccVersionDeltaWidth=");
|
||||
builder.append(mvccVersionDeltaWidth);
|
||||
builder.append(", allSameType=");
|
||||
builder.append(allSameType);
|
||||
builder.append(", allTypes=");
|
||||
builder.append(allTypes);
|
||||
builder.append(", numUniqueRows=");
|
||||
builder.append(numUniqueRows);
|
||||
builder.append(", numUniqueFamilies=");
|
||||
builder.append(numUniqueFamilies);
|
||||
builder.append(", numUniqueQualifiers=");
|
||||
builder.append(numUniqueQualifiers);
|
||||
builder.append("]");
|
||||
return builder.toString();
|
||||
}
|
||||
|
||||
|
||||
/************** absolute getters *******************/
|
||||
|
||||
public int getAbsoluteMetaOffset() {
|
||||
return arrayOffset + bufferOffset;
|
||||
}
|
||||
|
||||
public int getAbsoluteRowOffset() {
|
||||
return getAbsoluteMetaOffset() + numMetaBytes;
|
||||
}
|
||||
|
||||
public int getAbsoluteFamilyOffset() {
|
||||
return getAbsoluteRowOffset() + numRowBytes;
|
||||
}
|
||||
|
||||
public int getAbsoluteQualifierOffset() {
|
||||
return getAbsoluteFamilyOffset() + numFamilyBytes;
|
||||
}
|
||||
|
||||
public int getAbsoluteTimestampOffset() {
|
||||
return getAbsoluteQualifierOffset() + numQualifierBytes;
|
||||
}
|
||||
|
||||
public int getAbsoluteMvccVersionOffset() {
|
||||
return getAbsoluteTimestampOffset() + numTimestampBytes;
|
||||
}
|
||||
|
||||
public int getAbsoluteValueOffset() {
|
||||
return getAbsoluteMvccVersionOffset() + numMvccVersionBytes;
|
||||
}
|
||||
|
||||
|
||||
/*************** get/set ***************************/
|
||||
|
||||
public int getTimestampDeltaWidth() {
|
||||
return timestampDeltaWidth;
|
||||
}
|
||||
|
||||
public void setTimestampDeltaWidth(int timestampDeltaWidth) {
|
||||
this.timestampDeltaWidth = timestampDeltaWidth;
|
||||
}
|
||||
|
||||
public int getValueOffsetWidth() {
|
||||
return valueOffsetWidth;
|
||||
}
|
||||
|
||||
public void setValueOffsetWidth(int dataOffsetWidth) {
|
||||
this.valueOffsetWidth = dataOffsetWidth;
|
||||
}
|
||||
|
||||
public int getValueLengthWidth() {
|
||||
return valueLengthWidth;
|
||||
}
|
||||
|
||||
public void setValueLengthWidth(int dataLengthWidth) {
|
||||
this.valueLengthWidth = dataLengthWidth;
|
||||
}
|
||||
|
||||
public int getMaxRowLength() {
|
||||
return maxRowLength;
|
||||
}
|
||||
|
||||
public void setMaxRowLength(int maxRowLength) {
|
||||
this.maxRowLength = maxRowLength;
|
||||
}
|
||||
|
||||
public long getMinTimestamp() {
|
||||
return minTimestamp;
|
||||
}
|
||||
|
||||
public void setMinTimestamp(long minTimestamp) {
|
||||
this.minTimestamp = minTimestamp;
|
||||
}
|
||||
|
||||
public byte getAllTypes() {
|
||||
return allTypes;
|
||||
}
|
||||
|
||||
public void setAllTypes(byte allTypes) {
|
||||
this.allTypes = allTypes;
|
||||
}
|
||||
|
||||
public boolean isAllSameType() {
|
||||
return allSameType;
|
||||
}
|
||||
|
||||
public void setAllSameType(boolean allSameType) {
|
||||
this.allSameType = allSameType;
|
||||
}
|
||||
|
||||
public int getNextNodeOffsetWidth() {
|
||||
return nextNodeOffsetWidth;
|
||||
}
|
||||
|
||||
public void setNextNodeOffsetWidth(int nextNodeOffsetWidth) {
|
||||
this.nextNodeOffsetWidth = nextNodeOffsetWidth;
|
||||
}
|
||||
|
||||
public int getNumRowBytes() {
|
||||
return numRowBytes;
|
||||
}
|
||||
|
||||
public void setNumRowBytes(int numRowBytes) {
|
||||
this.numRowBytes = numRowBytes;
|
||||
}
|
||||
|
||||
public int getNumTimestampBytes() {
|
||||
return numTimestampBytes;
|
||||
}
|
||||
|
||||
public void setNumTimestampBytes(int numTimestampBytes) {
|
||||
this.numTimestampBytes = numTimestampBytes;
|
||||
}
|
||||
|
||||
public int getNumValueBytes() {
|
||||
return numValueBytes;
|
||||
}
|
||||
|
||||
public void setNumValueBytes(int numValueBytes) {
|
||||
this.numValueBytes = numValueBytes;
|
||||
}
|
||||
|
||||
public int getNumMetaBytes() {
|
||||
return numMetaBytes;
|
||||
}
|
||||
|
||||
public void setNumMetaBytes(int numMetaBytes) {
|
||||
this.numMetaBytes = numMetaBytes;
|
||||
}
|
||||
|
||||
public int getArrayOffset() {
|
||||
return arrayOffset;
|
||||
}
|
||||
|
||||
public void setArrayOffset(int arrayOffset) {
|
||||
this.arrayOffset = arrayOffset;
|
||||
}
|
||||
|
||||
public int getBufferOffset() {
|
||||
return bufferOffset;
|
||||
}
|
||||
|
||||
public void setBufferOffset(int bufferOffset) {
|
||||
this.bufferOffset = bufferOffset;
|
||||
}
|
||||
|
||||
public int getNumKeyValueBytes() {
|
||||
return numKeyValueBytes;
|
||||
}
|
||||
|
||||
public void setNumKeyValueBytes(int numKeyValueBytes) {
|
||||
this.numKeyValueBytes = numKeyValueBytes;
|
||||
}
|
||||
|
||||
public int getRowTreeDepth() {
|
||||
return rowTreeDepth;
|
||||
}
|
||||
|
||||
public void setRowTreeDepth(int rowTreeDepth) {
|
||||
this.rowTreeDepth = rowTreeDepth;
|
||||
}
|
||||
|
||||
public int getNumMvccVersionBytes() {
|
||||
return numMvccVersionBytes;
|
||||
}
|
||||
|
||||
public void setNumMvccVersionBytes(int numMvccVersionBytes) {
|
||||
this.numMvccVersionBytes = numMvccVersionBytes;
|
||||
}
|
||||
|
||||
public int getMvccVersionDeltaWidth() {
|
||||
return mvccVersionDeltaWidth;
|
||||
}
|
||||
|
||||
public void setMvccVersionDeltaWidth(int mvccVersionDeltaWidth) {
|
||||
this.mvccVersionDeltaWidth = mvccVersionDeltaWidth;
|
||||
}
|
||||
|
||||
public long getMinMvccVersion() {
|
||||
return minMvccVersion;
|
||||
}
|
||||
|
||||
public void setMinMvccVersion(long minMvccVersion) {
|
||||
this.minMvccVersion = minMvccVersion;
|
||||
}
|
||||
|
||||
public int getNumFamilyBytes() {
|
||||
return numFamilyBytes;
|
||||
}
|
||||
|
||||
public void setNumFamilyBytes(int numFamilyBytes) {
|
||||
this.numFamilyBytes = numFamilyBytes;
|
||||
}
|
||||
|
||||
public int getFamilyOffsetWidth() {
|
||||
return familyOffsetWidth;
|
||||
}
|
||||
|
||||
public void setFamilyOffsetWidth(int familyOffsetWidth) {
|
||||
this.familyOffsetWidth = familyOffsetWidth;
|
||||
}
|
||||
|
||||
public int getNumUniqueRows() {
|
||||
return numUniqueRows;
|
||||
}
|
||||
|
||||
public void setNumUniqueRows(int numUniqueRows) {
|
||||
this.numUniqueRows = numUniqueRows;
|
||||
}
|
||||
|
||||
public int getNumUniqueFamilies() {
|
||||
return numUniqueFamilies;
|
||||
}
|
||||
|
||||
public void setNumUniqueFamilies(int numUniqueFamilies) {
|
||||
this.numUniqueFamilies = numUniqueFamilies;
|
||||
}
|
||||
|
||||
public int getNumUniqueQualifiers() {
|
||||
return numUniqueQualifiers;
|
||||
}
|
||||
|
||||
public void setNumUniqueQualifiers(int numUniqueQualifiers) {
|
||||
this.numUniqueQualifiers = numUniqueQualifiers;
|
||||
}
|
||||
|
||||
public int getNumQualifierBytes() {
|
||||
return numQualifierBytes;
|
||||
}
|
||||
|
||||
public void setNumQualifierBytes(int numQualifierBytes) {
|
||||
this.numQualifierBytes = numQualifierBytes;
|
||||
}
|
||||
|
||||
public int getQualifierOffsetWidth() {
|
||||
return qualifierOffsetWidth;
|
||||
}
|
||||
|
||||
public void setQualifierOffsetWidth(int qualifierOffsetWidth) {
|
||||
this.qualifierOffsetWidth = qualifierOffsetWidth;
|
||||
}
|
||||
|
||||
public int getMaxQualifierLength() {
|
||||
return maxQualifierLength;
|
||||
}
|
||||
|
||||
public void setMaxQualifierLength(int maxQualifierLength) {
|
||||
this.maxQualifierLength = maxQualifierLength;
|
||||
}
|
||||
|
||||
public int getTimestampIndexWidth() {
|
||||
return timestampIndexWidth;
|
||||
}
|
||||
|
||||
public void setTimestampIndexWidth(int timestampIndexWidth) {
|
||||
this.timestampIndexWidth = timestampIndexWidth;
|
||||
}
|
||||
|
||||
public int getMvccVersionIndexWidth() {
|
||||
return mvccVersionIndexWidth;
|
||||
}
|
||||
|
||||
public void setMvccVersionIndexWidth(int mvccVersionIndexWidth) {
|
||||
this.mvccVersionIndexWidth = mvccVersionIndexWidth;
|
||||
}
|
||||
|
||||
public int getVersion() {
|
||||
return version;
|
||||
}
|
||||
|
||||
public void setVersion(int version) {
|
||||
this.version = version;
|
||||
}
|
||||
|
||||
public boolean isIncludesMvccVersion() {
|
||||
return includesMvccVersion;
|
||||
}
|
||||
|
||||
public void setIncludesMvccVersion(boolean includesMvccVersion) {
|
||||
this.includesMvccVersion = includesMvccVersion;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,209 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hbase.codec.prefixtree;
|
||||
|
||||
import java.io.DataInputStream;
|
||||
import java.io.DataOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.nio.ByteBuffer;
|
||||
|
||||
import org.apache.hadoop.classification.InterfaceAudience;
|
||||
import org.apache.hadoop.hbase.KeyValue;
|
||||
import org.apache.hadoop.hbase.KeyValue.KeyComparator;
|
||||
import org.apache.hadoop.hbase.KeyValue.MetaKeyComparator;
|
||||
import org.apache.hadoop.hbase.KeyValue.RootKeyComparator;
|
||||
import org.apache.hadoop.hbase.KeyValueTool;
|
||||
import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
|
||||
import org.apache.hadoop.hbase.io.encoding.DataBlockEncoder;
|
||||
import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
|
||||
import org.apache.hadoop.hbase.io.encoding.HFileBlockDecodingContext;
|
||||
import org.apache.hadoop.hbase.io.encoding.HFileBlockDefaultDecodingContext;
|
||||
import org.apache.hadoop.hbase.io.encoding.HFileBlockDefaultEncodingContext;
|
||||
import org.apache.hadoop.hbase.io.encoding.HFileBlockEncodingContext;
|
||||
import org.apache.hadoop.hbase.io.hfile.BlockType;
|
||||
import org.apache.hadoop.hbase.util.ByteBufferUtils;
|
||||
import org.apache.hadoop.io.RawComparator;
|
||||
import org.apache.hbase.codec.prefixtree.decode.DecoderFactory;
|
||||
import org.apache.hbase.codec.prefixtree.decode.PrefixTreeArraySearcher;
|
||||
import org.apache.hbase.codec.prefixtree.encode.EncoderFactory;
|
||||
import org.apache.hbase.codec.prefixtree.encode.PrefixTreeEncoder;
|
||||
import org.apache.hbase.codec.prefixtree.scanner.CellSearcher;
|
||||
|
||||
/**
|
||||
* This class is created via reflection in DataBlockEncoding enum. Update the enum if class name or
|
||||
* package changes.
|
||||
* <p/>
|
||||
* PrefixTreeDataBlockEncoder implementation of DataBlockEncoder. This is the primary entry point
|
||||
* for PrefixTree encoding and decoding. Encoding is delegated to instances of
|
||||
* {@link PrefixTreeEncoder}, and decoding is delegated to instances of
|
||||
* {@link org.apache.hbase.codec.prefixtree.scanner.CellSearcher}. Encoder and decoder instances are
|
||||
* created and recycled by static PtEncoderFactory and PtDecoderFactory.
|
||||
*/
|
||||
@InterfaceAudience.Private
|
||||
public class PrefixTreeCodec implements DataBlockEncoder{
|
||||
|
||||
/**
|
||||
* no-arg constructor for reflection
|
||||
*/
|
||||
public PrefixTreeCodec() {
|
||||
}
|
||||
|
||||
/**
|
||||
* Copied from BufferedDataBlockEncoder. Almost definitely can be improved, but i'm not familiar
|
||||
* enough with the concept of the HFileBlockEncodingContext.
|
||||
*/
|
||||
@Override
|
||||
public void encodeKeyValues(ByteBuffer in, boolean includesMvccVersion,
|
||||
HFileBlockEncodingContext blkEncodingCtx) throws IOException {
|
||||
if (blkEncodingCtx.getClass() != HFileBlockDefaultEncodingContext.class) {
|
||||
throw new IOException(this.getClass().getName() + " only accepts "
|
||||
+ HFileBlockDefaultEncodingContext.class.getName() + " as the " + "encoding context.");
|
||||
}
|
||||
|
||||
HFileBlockDefaultEncodingContext encodingCtx
|
||||
= (HFileBlockDefaultEncodingContext) blkEncodingCtx;
|
||||
encodingCtx.prepareEncoding();
|
||||
DataOutputStream dataOut = encodingCtx.getOutputStreamForEncoder();
|
||||
internalEncodeKeyValues(dataOut, in, includesMvccVersion);
|
||||
|
||||
//do i need to check this, or will it always be DataBlockEncoding.PREFIX_TREE?
|
||||
if (encodingCtx.getDataBlockEncoding() != DataBlockEncoding.NONE) {
|
||||
encodingCtx.postEncoding(BlockType.ENCODED_DATA);
|
||||
} else {
|
||||
encodingCtx.postEncoding(BlockType.DATA);
|
||||
}
|
||||
}
|
||||
|
||||
private void internalEncodeKeyValues(DataOutputStream encodedOutputStream,
|
||||
ByteBuffer rawKeyValues, boolean includesMvccVersion) throws IOException {
|
||||
rawKeyValues.rewind();
|
||||
PrefixTreeEncoder builder = EncoderFactory.checkOut(encodedOutputStream, includesMvccVersion);
|
||||
|
||||
try{
|
||||
KeyValue kv;
|
||||
while ((kv = KeyValueTool.nextShallowCopy(rawKeyValues, includesMvccVersion)) != null) {
|
||||
builder.write(kv);
|
||||
}
|
||||
builder.flush();
|
||||
}finally{
|
||||
EncoderFactory.checkIn(builder);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public ByteBuffer decodeKeyValues(DataInputStream source, boolean includesMvccVersion)
|
||||
throws IOException {
|
||||
return decodeKeyValues(source, 0, 0, includesMvccVersion);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* I don't think this method is called during normal HBase operation, so efficiency is not
|
||||
* important.
|
||||
*/
|
||||
@Override
|
||||
public ByteBuffer decodeKeyValues(DataInputStream source, int allocateHeaderLength,
|
||||
int skipLastBytes, boolean includesMvccVersion) throws IOException {
|
||||
ByteBuffer sourceAsBuffer = ByteBufferUtils.drainInputStreamToBuffer(source);// waste
|
||||
sourceAsBuffer.mark();
|
||||
PrefixTreeBlockMeta blockMeta = new PrefixTreeBlockMeta(sourceAsBuffer);
|
||||
sourceAsBuffer.rewind();
|
||||
int numV1BytesWithHeader = allocateHeaderLength + blockMeta.getNumKeyValueBytes();
|
||||
byte[] keyValueBytesWithHeader = new byte[numV1BytesWithHeader];
|
||||
ByteBuffer result = ByteBuffer.wrap(keyValueBytesWithHeader);
|
||||
result.rewind();
|
||||
CellSearcher searcher = null;
|
||||
try {
|
||||
searcher = DecoderFactory.checkOut(sourceAsBuffer, includesMvccVersion);
|
||||
while (searcher.next()) {
|
||||
KeyValue currentCell = KeyValueTool.copyToNewKeyValue(searcher.getCurrent());
|
||||
// needs to be modified for DirectByteBuffers. no existing methods to
|
||||
// write VLongs to byte[]
|
||||
int offset = result.arrayOffset() + result.position();
|
||||
KeyValueTool.appendToByteArray(currentCell, result.array(), offset);
|
||||
int keyValueLength = KeyValueTool.length(currentCell);
|
||||
ByteBufferUtils.skip(result, keyValueLength);
|
||||
offset += keyValueLength;
|
||||
if (includesMvccVersion) {
|
||||
ByteBufferUtils.writeVLong(result, currentCell.getMvccVersion());
|
||||
}
|
||||
}
|
||||
result.position(result.limit());//make it appear as if we were appending
|
||||
return result;
|
||||
} finally {
|
||||
DecoderFactory.checkIn(searcher);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public ByteBuffer getFirstKeyInBlock(ByteBuffer block) {
|
||||
block.rewind();
|
||||
PrefixTreeArraySearcher searcher = null;
|
||||
try {
|
||||
//should i includeMemstoreTS (second argument)? i think PrefixKeyDeltaEncoder is, so i will
|
||||
searcher = DecoderFactory.checkOut(block, true);
|
||||
if (!searcher.positionAtFirstCell()) {
|
||||
return null;
|
||||
}
|
||||
return KeyValueTool.copyKeyToNewByteBuffer(searcher.getCurrent());
|
||||
} finally {
|
||||
DecoderFactory.checkIn(searcher);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public HFileBlockEncodingContext newDataBlockEncodingContext(Algorithm compressionAlgorithm,
|
||||
DataBlockEncoding encoding, byte[] header) {
|
||||
if(DataBlockEncoding.PREFIX_TREE != encoding){
|
||||
//i'm not sure why encoding is in the interface. Each encoder implementation should probably
|
||||
//know it's encoding type
|
||||
throw new IllegalArgumentException("only DataBlockEncoding.PREFIX_TREE supported");
|
||||
}
|
||||
return new HFileBlockDefaultEncodingContext(compressionAlgorithm, encoding, header);
|
||||
}
|
||||
|
||||
@Override
|
||||
public HFileBlockDecodingContext newDataBlockDecodingContext(Algorithm compressionAlgorithm) {
|
||||
return new HFileBlockDefaultDecodingContext(compressionAlgorithm);
|
||||
}
|
||||
|
||||
/**
|
||||
* Is this the correct handling of an illegal comparator? How to prevent that from getting all
|
||||
* the way to this point.
|
||||
*/
|
||||
@Override
|
||||
public EncodedSeeker createSeeker(RawComparator<byte[]> comparator, boolean includesMvccVersion) {
|
||||
if(! (comparator instanceof KeyComparator)){
|
||||
throw new IllegalArgumentException("comparator must be KeyValue.KeyComparator");
|
||||
}
|
||||
if(comparator instanceof MetaKeyComparator){
|
||||
throw new IllegalArgumentException("DataBlockEncoding.PREFIX_TREE not compatible with META "
|
||||
+"table");
|
||||
}
|
||||
if(comparator instanceof RootKeyComparator){
|
||||
throw new IllegalArgumentException("DataBlockEncoding.PREFIX_TREE not compatible with ROOT "
|
||||
+"table");
|
||||
}
|
||||
|
||||
return new PrefixTreeSeeker(includesMvccVersion);
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,216 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hbase.codec.prefixtree;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
|
||||
import org.apache.hadoop.classification.InterfaceAudience;
|
||||
import org.apache.hadoop.hbase.KeyValue;
|
||||
import org.apache.hadoop.hbase.KeyValueTool;
|
||||
import org.apache.hadoop.hbase.io.encoding.DataBlockEncoder.EncodedSeeker;
|
||||
import org.apache.hbase.Cell;
|
||||
import org.apache.hbase.cell.CellScannerPosition;
|
||||
import org.apache.hbase.cell.CellTool;
|
||||
import org.apache.hbase.codec.prefixtree.decode.DecoderFactory;
|
||||
import org.apache.hbase.codec.prefixtree.decode.PrefixTreeArraySearcher;
|
||||
|
||||
/**
|
||||
* These methods have the same definition as any implementation of the EncodedSeeker.
|
||||
*
|
||||
* In the future, the EncodedSeeker could be modified to work with the Cell interface directly. It
|
||||
* currently returns a new KeyValue object each time getKeyValue is called. This is not horrible,
|
||||
* but in order to create a new KeyValue object, we must first allocate a new byte[] and copy in
|
||||
* the data from the PrefixTreeCell. It is somewhat heavyweight right now.
|
||||
*/
|
||||
@InterfaceAudience.Private
|
||||
public class PrefixTreeSeeker implements EncodedSeeker {
|
||||
|
||||
protected ByteBuffer block;
|
||||
protected boolean includeMvccVersion;
|
||||
protected PrefixTreeArraySearcher ptSearcher;
|
||||
|
||||
public PrefixTreeSeeker(boolean includeMvccVersion) {
|
||||
this.includeMvccVersion = includeMvccVersion;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setCurrentBuffer(ByteBuffer fullBlockBuffer) {
|
||||
block = fullBlockBuffer;
|
||||
ptSearcher = DecoderFactory.checkOut(block, includeMvccVersion);
|
||||
rewind();
|
||||
}
|
||||
|
||||
/**
|
||||
* Currently unused.
|
||||
* <p/>
|
||||
* TODO performance leak. should reuse the searchers. hbase does not currently have a hook where
|
||||
* this can be called
|
||||
*/
|
||||
public void releaseCurrentSearcher(){
|
||||
DecoderFactory.checkIn(ptSearcher);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public ByteBuffer getKeyDeepCopy() {
|
||||
return KeyValueTool.copyKeyToNewByteBuffer(ptSearcher.getCurrent());
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public ByteBuffer getValueShallowCopy() {
|
||||
return CellTool.getValueBufferShallowCopy(ptSearcher.getCurrent());
|
||||
}
|
||||
|
||||
/**
|
||||
* currently must do deep copy into new array
|
||||
*/
|
||||
@Override
|
||||
public ByteBuffer getKeyValueBuffer() {
|
||||
return KeyValueTool.copyToNewByteBuffer(ptSearcher.getCurrent());
|
||||
}
|
||||
|
||||
/**
|
||||
* currently must do deep copy into new array
|
||||
*/
|
||||
@Override
|
||||
public KeyValue getKeyValue() {
|
||||
return KeyValueTool.copyToNewKeyValue(ptSearcher.getCurrent());
|
||||
}
|
||||
|
||||
/**
|
||||
* Currently unused.
|
||||
* <p/>
|
||||
* A nice, lightweight reference, though the underlying cell is transient. This method may return
|
||||
* the same reference to the backing PrefixTreeCell repeatedly, while other implementations may
|
||||
* return a different reference for each Cell.
|
||||
* <p/>
|
||||
* The goal will be to transition the upper layers of HBase, like Filters and KeyValueHeap, to use
|
||||
* this method instead of the getKeyValue() methods above.
|
||||
*/
|
||||
// @Override
|
||||
public Cell getCurrent() {
|
||||
return ptSearcher.getCurrent();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void rewind() {
|
||||
ptSearcher.positionAtFirstCell();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean next() {
|
||||
return ptSearcher.next();
|
||||
}
|
||||
|
||||
// @Override
|
||||
public boolean advance() {
|
||||
return ptSearcher.next();
|
||||
}
|
||||
|
||||
|
||||
private static final boolean USE_POSITION_BEFORE = false;
|
||||
|
||||
/**
|
||||
* Seek forward only (should be called reseekToKeyInBlock?).
|
||||
* <p/>
|
||||
* If the exact key is found look at the seekBefore variable and:<br/>
|
||||
* - if true: go to the previous key if it's true<br/>
|
||||
* - if false: stay on the exact key
|
||||
* <p/>
|
||||
* If the exact key is not found, then go to the previous key *if possible*, but remember to leave
|
||||
* the scanner in a valid state if possible.
|
||||
* <p/>
|
||||
* @param keyOnlyBytes KeyValue format of a Cell's key at which to position the seeker
|
||||
* @param offset offset into the keyOnlyBytes array
|
||||
* @param length number of bytes of the keyOnlyBytes array to use
|
||||
* @param forceBeforeOnExactMatch if an exact match is found and seekBefore=true, back up one Cell
|
||||
* @return 0 if the seeker is on the exact key<br/>
|
||||
* 1 if the seeker is not on the key for any reason, including seekBefore being true
|
||||
*/
|
||||
@Override
|
||||
public int seekToKeyInBlock(byte[] keyOnlyBytes, int offset, int length,
|
||||
boolean forceBeforeOnExactMatch) {
|
||||
if (USE_POSITION_BEFORE) {
|
||||
return seekToOrBeforeUsingPositionAtOrBefore(keyOnlyBytes, offset, length,
|
||||
forceBeforeOnExactMatch);
|
||||
}else{
|
||||
return seekToOrBeforeUsingPositionAtOrAfter(keyOnlyBytes, offset, length,
|
||||
forceBeforeOnExactMatch);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* Support both of these options since the underlying PrefixTree supports both. Possibly
|
||||
* expand the EncodedSeeker to utilize them both.
|
||||
*/
|
||||
|
||||
protected int seekToOrBeforeUsingPositionAtOrBefore(byte[] keyOnlyBytes, int offset, int length,
|
||||
boolean forceBeforeOnExactMatch){
|
||||
// this does a deep copy of the key byte[] because the CellSearcher interface wants a Cell
|
||||
KeyValue kv = KeyValue.createKeyValueFromKey(keyOnlyBytes, offset, length);
|
||||
|
||||
CellScannerPosition position = ptSearcher.seekForwardToOrBefore(kv);
|
||||
|
||||
if(CellScannerPosition.AT == position){
|
||||
if (forceBeforeOnExactMatch) {
|
||||
ptSearcher.previous();
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
protected int seekToOrBeforeUsingPositionAtOrAfter(byte[] keyOnlyBytes, int offset, int length,
|
||||
boolean forceBeforeOnExactMatch){
|
||||
// this does a deep copy of the key byte[] because the CellSearcher interface wants a Cell
|
||||
KeyValue kv = KeyValue.createKeyValueFromKey(keyOnlyBytes, offset, length);
|
||||
|
||||
//should probably switch this to use the seekForwardToOrBefore method
|
||||
CellScannerPosition position = ptSearcher.seekForwardToOrAfter(kv);
|
||||
|
||||
if(CellScannerPosition.AT == position){
|
||||
if (forceBeforeOnExactMatch) {
|
||||
ptSearcher.previous();
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
|
||||
}
|
||||
|
||||
if(CellScannerPosition.AFTER == position){
|
||||
if(!ptSearcher.isBeforeFirst()){
|
||||
ptSearcher.previous();
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
if(position == CellScannerPosition.AFTER_LAST){
|
||||
return 1;
|
||||
}
|
||||
|
||||
throw new RuntimeException("unexpected CellScannerPosition:"+position);
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,63 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hbase.codec.prefixtree.decode;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.util.Queue;
|
||||
import java.util.concurrent.LinkedBlockingQueue;
|
||||
|
||||
import org.apache.hadoop.classification.InterfaceAudience;
|
||||
|
||||
/**
|
||||
* Pools PrefixTreeArraySearcher objects. Each Searcher can consist of hundreds or thousands of
|
||||
* objects and 1 is needed for each HFile during a Get operation. With tens of thousands of
|
||||
* Gets/second, reusing these searchers may save a lot of young gen collections.
|
||||
* <p/>
|
||||
* Alternative implementation would be a ByteBufferSearcherPool (not implemented yet).
|
||||
*/
|
||||
@InterfaceAudience.Private
|
||||
public class ArraySearcherPool {
|
||||
|
||||
/**
|
||||
* One decoder is needed for each storefile for each Get operation so we may need hundreds at the
|
||||
* same time, however, decoding is a CPU bound activity so should limit this to something in the
|
||||
* realm of maximum reasonable active threads.
|
||||
*/
|
||||
private static final Integer MAX_POOL_SIZE = 1000;
|
||||
|
||||
protected Queue<PrefixTreeArraySearcher> pool
|
||||
= new LinkedBlockingQueue<PrefixTreeArraySearcher>(MAX_POOL_SIZE);
|
||||
|
||||
public PrefixTreeArraySearcher checkOut(ByteBuffer buffer, boolean includesMvccVersion) {
|
||||
PrefixTreeArraySearcher searcher = pool.poll();//will return null if pool is empty
|
||||
searcher = DecoderFactory.ensureArraySearcherValid(buffer, searcher, includesMvccVersion);
|
||||
return searcher;
|
||||
}
|
||||
|
||||
public void checkIn(PrefixTreeArraySearcher searcher) {
|
||||
searcher.releaseBlockReference();
|
||||
pool.offer(searcher);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return ("poolSize:" + pool.size());
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,89 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hbase.codec.prefixtree.decode;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
|
||||
import org.apache.hadoop.classification.InterfaceAudience;
|
||||
import org.apache.hbase.codec.prefixtree.PrefixTreeBlockMeta;
|
||||
import org.apache.hbase.codec.prefixtree.scanner.CellSearcher;
|
||||
|
||||
/**
|
||||
* Static wrapper class for the ArraySearcherPool.
|
||||
*/
|
||||
@InterfaceAudience.Private
|
||||
public class DecoderFactory {
|
||||
|
||||
private static final ArraySearcherPool POOL = new ArraySearcherPool();
|
||||
|
||||
//TODO will need a PrefixTreeSearcher on top of CellSearcher
|
||||
public static PrefixTreeArraySearcher checkOut(final ByteBuffer buffer,
|
||||
boolean includeMvccVersion) {
|
||||
if (buffer.isDirect()) {
|
||||
throw new IllegalArgumentException("DirectByteBuffers not supported yet");
|
||||
// TODO implement PtByteBufferBlockScanner
|
||||
}
|
||||
|
||||
PrefixTreeArraySearcher searcher = POOL.checkOut(buffer,
|
||||
includeMvccVersion);
|
||||
return searcher;
|
||||
}
|
||||
|
||||
public static void checkIn(CellSearcher pSearcher) {
|
||||
if (pSearcher == null) {
|
||||
return;
|
||||
}
|
||||
if (! (pSearcher instanceof PrefixTreeArraySearcher)) {
|
||||
throw new IllegalArgumentException("Cannot return "+pSearcher.getClass()+" to "
|
||||
+DecoderFactory.class);
|
||||
}
|
||||
PrefixTreeArraySearcher searcher = (PrefixTreeArraySearcher) pSearcher;
|
||||
POOL.checkIn(searcher);
|
||||
}
|
||||
|
||||
|
||||
/**************************** helper ******************************/
|
||||
|
||||
public static PrefixTreeArraySearcher ensureArraySearcherValid(ByteBuffer buffer,
|
||||
PrefixTreeArraySearcher searcher, boolean includeMvccVersion) {
|
||||
if (searcher == null) {
|
||||
PrefixTreeBlockMeta blockMeta = new PrefixTreeBlockMeta(buffer);
|
||||
searcher = new PrefixTreeArraySearcher(blockMeta, blockMeta.getRowTreeDepth(),
|
||||
blockMeta.getMaxRowLength(), blockMeta.getMaxQualifierLength());
|
||||
searcher.initOnBlock(blockMeta, buffer.array(), includeMvccVersion);
|
||||
return searcher;
|
||||
}
|
||||
|
||||
PrefixTreeBlockMeta blockMeta = searcher.getBlockMeta();
|
||||
blockMeta.initOnBlock(buffer);
|
||||
if (!searcher.areBuffersBigEnough()) {
|
||||
int maxRowTreeStackNodes = Math.max(blockMeta.getRowTreeDepth(),
|
||||
searcher.getMaxRowTreeStackNodes());
|
||||
int rowBufferLength = Math.max(blockMeta.getMaxRowLength(), searcher.getRowBufferLength());
|
||||
int qualifierBufferLength = Math.max(blockMeta.getMaxQualifierLength(),
|
||||
searcher.getQualifierBufferLength());
|
||||
searcher = new PrefixTreeArraySearcher(blockMeta, maxRowTreeStackNodes, rowBufferLength,
|
||||
qualifierBufferLength);
|
||||
}
|
||||
//this is where we parse the BlockMeta
|
||||
searcher.initOnBlock(blockMeta, buffer.array(), includeMvccVersion);
|
||||
return searcher;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,144 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hbase.codec.prefixtree.decode;
|
||||
|
||||
import org.apache.hadoop.classification.InterfaceAudience;
|
||||
import org.apache.hbase.codec.prefixtree.PrefixTreeBlockMeta;
|
||||
import org.apache.hbase.codec.prefixtree.scanner.ReversibleCellScanner;
|
||||
|
||||
/**
|
||||
* Methods for going backwards through a PrefixTree block. This class is split out on its own to
|
||||
* simplify the Scanner superclass and Searcher subclass.
|
||||
*/
|
||||
@InterfaceAudience.Private
|
||||
public class PrefixTreeArrayReversibleScanner extends PrefixTreeArrayScanner implements
|
||||
ReversibleCellScanner {
|
||||
|
||||
/***************** construct ******************************/
|
||||
|
||||
public PrefixTreeArrayReversibleScanner(PrefixTreeBlockMeta blockMeta, int rowTreeDepth,
|
||||
int rowBufferLength, int qualifierBufferLength) {
|
||||
super(blockMeta, rowTreeDepth, rowBufferLength, qualifierBufferLength);
|
||||
}
|
||||
|
||||
|
||||
/***************** Object methods ***************************/
|
||||
|
||||
@Override
|
||||
public boolean equals(Object obj) {
|
||||
//trivial override to confirm intent (findbugs)
|
||||
return super.equals(obj);
|
||||
}
|
||||
|
||||
|
||||
/***************** methods **********************************/
|
||||
|
||||
@Override
|
||||
public boolean previous() {
|
||||
if (afterLast) {
|
||||
afterLast = false;
|
||||
positionAtLastCell();
|
||||
return true;
|
||||
}
|
||||
if (beforeFirst) {
|
||||
return false;
|
||||
}
|
||||
if (isFirstCellInRow()) {
|
||||
previousRowInternal();
|
||||
if (beforeFirst) {
|
||||
return false;
|
||||
}
|
||||
populateLastNonRowFields();
|
||||
return true;
|
||||
}
|
||||
populatePreviousNonRowFields();
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean previousRow(boolean endOfRow) {
|
||||
previousRowInternal();
|
||||
if(beforeFirst){
|
||||
return false;
|
||||
}
|
||||
if(endOfRow){
|
||||
populateLastNonRowFields();
|
||||
}else{
|
||||
populateFirstNonRowFields();
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
private boolean previousRowInternal() {
|
||||
if (beforeFirst) {
|
||||
return false;
|
||||
}
|
||||
if (afterLast) {
|
||||
positionAtLastRow();
|
||||
return true;
|
||||
}
|
||||
if (currentRowNode.hasOccurrences()) {
|
||||
discardCurrentRowNode(false);
|
||||
if(currentRowNode==null){
|
||||
return false;
|
||||
}
|
||||
}
|
||||
while (!beforeFirst) {
|
||||
if (isDirectlyAfterNub()) {//we are about to back up to the nub
|
||||
currentRowNode.resetFanIndex();//sets it to -1, which is before the first leaf
|
||||
nubCellsRemain = true;//this positions us on the nub
|
||||
return true;
|
||||
}
|
||||
if (currentRowNode.hasPreviousFanNodes()) {
|
||||
followPreviousFan();
|
||||
descendToLastRowFromCurrentPosition();
|
||||
} else {// keep going up the stack until we find previous fan positions
|
||||
discardCurrentRowNode(false);
|
||||
if(currentRowNode==null){
|
||||
return false;
|
||||
}
|
||||
}
|
||||
if (currentRowNode.hasOccurrences()) {// escape clause
|
||||
return true;// found some values
|
||||
}
|
||||
}
|
||||
return false;// went past the beginning
|
||||
}
|
||||
|
||||
protected boolean isDirectlyAfterNub() {
|
||||
return currentRowNode.isNub() && currentRowNode.getFanIndex()==0;
|
||||
}
|
||||
|
||||
protected void positionAtLastRow() {
|
||||
reInitFirstNode();
|
||||
descendToLastRowFromCurrentPosition();
|
||||
}
|
||||
|
||||
protected void descendToLastRowFromCurrentPosition() {
|
||||
while (currentRowNode.hasChildren()) {
|
||||
followLastFan();
|
||||
}
|
||||
}
|
||||
|
||||
protected void positionAtLastCell() {
|
||||
positionAtLastRow();
|
||||
populateLastNonRowFields();
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,506 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hbase.codec.prefixtree.decode;
|
||||
|
||||
import org.apache.hadoop.classification.InterfaceAudience;
|
||||
import org.apache.hbase.Cell;
|
||||
import org.apache.hbase.cell.CellComparator;
|
||||
import org.apache.hbase.codec.prefixtree.PrefixTreeBlockMeta;
|
||||
import org.apache.hbase.codec.prefixtree.decode.column.ColumnReader;
|
||||
import org.apache.hbase.codec.prefixtree.decode.row.RowNodeReader;
|
||||
import org.apache.hbase.codec.prefixtree.decode.timestamp.MvccVersionDecoder;
|
||||
import org.apache.hbase.codec.prefixtree.decode.timestamp.TimestampDecoder;
|
||||
import org.apache.hbase.codec.prefixtree.scanner.CellScanner;
|
||||
|
||||
/**
|
||||
* Extends PtCell and manipulates its protected fields. Could alternatively contain a PtCell and
|
||||
* call get/set methods.
|
||||
*
|
||||
* This is an "Array" scanner to distinguish from a future "ByteBuffer" scanner. This
|
||||
* implementation requires that the bytes be in a normal java byte[] for performance. The
|
||||
* alternative ByteBuffer implementation would allow for accessing data in an off-heap ByteBuffer
|
||||
* without copying the whole buffer on-heap.
|
||||
*/
|
||||
@InterfaceAudience.Private
|
||||
public class PrefixTreeArrayScanner extends PrefixTreeCell implements CellScanner {
|
||||
|
||||
/***************** fields ********************************/
|
||||
|
||||
protected PrefixTreeBlockMeta blockMeta;
|
||||
|
||||
protected boolean beforeFirst;
|
||||
protected boolean afterLast;
|
||||
|
||||
protected RowNodeReader[] rowNodes;
|
||||
protected int rowNodeStackIndex;
|
||||
|
||||
protected RowNodeReader currentRowNode;
|
||||
protected ColumnReader familyReader;
|
||||
protected ColumnReader qualifierReader;
|
||||
protected TimestampDecoder timestampDecoder;
|
||||
protected MvccVersionDecoder mvccVersionDecoder;
|
||||
|
||||
protected boolean nubCellsRemain;
|
||||
protected int currentCellIndex;
|
||||
|
||||
|
||||
/*********************** construct ******************************/
|
||||
|
||||
// pass in blockMeta so we can initialize buffers big enough for all cells in the block
|
||||
public PrefixTreeArrayScanner(PrefixTreeBlockMeta blockMeta, int rowTreeDepth,
|
||||
int rowBufferLength, int qualifierBufferLength) {
|
||||
this.rowNodes = new RowNodeReader[rowTreeDepth];
|
||||
for (int i = 0; i < rowNodes.length; ++i) {
|
||||
rowNodes[i] = new RowNodeReader();
|
||||
}
|
||||
this.rowBuffer = new byte[rowBufferLength];
|
||||
this.familyBuffer = new byte[PrefixTreeBlockMeta.MAX_FAMILY_LENGTH];
|
||||
this.familyReader = new ColumnReader(familyBuffer, true);
|
||||
this.qualifierBuffer = new byte[qualifierBufferLength];
|
||||
this.qualifierReader = new ColumnReader(qualifierBuffer, false);
|
||||
this.timestampDecoder = new TimestampDecoder();
|
||||
this.mvccVersionDecoder = new MvccVersionDecoder();
|
||||
}
|
||||
|
||||
|
||||
/**************** init helpers ***************************************/
|
||||
|
||||
/**
|
||||
* Call when first accessing a block.
|
||||
* @return entirely new scanner if false
|
||||
*/
|
||||
public boolean areBuffersBigEnough() {
|
||||
if (rowNodes.length < blockMeta.getRowTreeDepth()) {
|
||||
return false;
|
||||
}
|
||||
if (rowBuffer.length < blockMeta.getMaxRowLength()) {
|
||||
return false;
|
||||
}
|
||||
if (qualifierBuffer.length < blockMeta.getMaxQualifierLength()) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
public void initOnBlock(PrefixTreeBlockMeta blockMeta, byte[] block, boolean includeMvccVersion) {
|
||||
this.block = block;
|
||||
this.blockMeta = blockMeta;
|
||||
this.familyOffset = familyBuffer.length;
|
||||
this.familyReader.initOnBlock(blockMeta, block);
|
||||
this.qualifierOffset = qualifierBuffer.length;
|
||||
this.qualifierReader.initOnBlock(blockMeta, block);
|
||||
this.timestampDecoder.initOnBlock(blockMeta, block);
|
||||
this.mvccVersionDecoder.initOnBlock(blockMeta, block);
|
||||
this.includeMvccVersion = includeMvccVersion;
|
||||
resetToBeforeFirstEntry();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void resetToBeforeFirstEntry() {
|
||||
beforeFirst = true;
|
||||
afterLast = false;
|
||||
rowNodeStackIndex = -1;
|
||||
currentRowNode = null;
|
||||
rowLength = 0;
|
||||
familyOffset = familyBuffer.length;
|
||||
familyLength = 0;
|
||||
qualifierOffset = blockMeta.getMaxQualifierLength();
|
||||
qualifierLength = 0;
|
||||
nubCellsRemain = false;
|
||||
currentCellIndex = -1;
|
||||
timestamp = -1L;
|
||||
type = DEFAULT_TYPE;
|
||||
absoluteValueOffset = 0;//use 0 vs -1 so the cell is valid when value hasn't been initialized
|
||||
valueLength = 0;// had it at -1, but that causes null Cell to add up to the wrong length
|
||||
}
|
||||
|
||||
/**
|
||||
* Call this before putting the scanner back into a pool so it doesn't hold the last used block
|
||||
* in memory.
|
||||
*/
|
||||
public void releaseBlockReference(){
|
||||
block = null;
|
||||
}
|
||||
|
||||
|
||||
/********************** CellScanner **********************/
|
||||
|
||||
@Override
|
||||
public PrefixTreeCell getCurrent() {
|
||||
if(isOutOfBounds()){
|
||||
return null;
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
|
||||
/******************* Object methods ************************/
|
||||
|
||||
@Override
|
||||
public boolean equals(Object obj) {
|
||||
//trivial override to confirm intent (findbugs)
|
||||
return super.equals(obj);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return super.hashCode();
|
||||
}
|
||||
|
||||
/**
|
||||
* Override PrefixTreeCell.toString() with a check to see if the current cell is valid.
|
||||
*/
|
||||
@Override
|
||||
public String toString() {
|
||||
PrefixTreeCell currentCell = getCurrent();
|
||||
if(currentCell==null){
|
||||
return "null";
|
||||
}
|
||||
return currentCell.getKeyValueString();
|
||||
}
|
||||
|
||||
|
||||
/******************* advance ***************************/
|
||||
|
||||
public boolean positionAtFirstCell() {
|
||||
reInitFirstNode();
|
||||
return next();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean next() {
|
||||
if (afterLast) {
|
||||
return false;
|
||||
}
|
||||
if (!hasOccurrences()) {
|
||||
resetToBeforeFirstEntry();
|
||||
}
|
||||
if (beforeFirst || isLastCellInRow()) {
|
||||
nextRow();
|
||||
if (afterLast) {
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
++currentCellIndex;
|
||||
}
|
||||
|
||||
populateNonRowFields(currentCellIndex);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
public boolean nextRow() {
|
||||
nextRowInternal();
|
||||
if (afterLast) {
|
||||
return false;
|
||||
}
|
||||
populateNonRowFields(currentCellIndex);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* This method is safe to call when the scanner is not on a fully valid row node, as in the case
|
||||
* of a row token miss in the Searcher
|
||||
* @return true if we are positioned on a valid row, false if past end of block
|
||||
*/
|
||||
protected boolean nextRowInternal() {
|
||||
if (afterLast) {
|
||||
return false;
|
||||
}
|
||||
if (beforeFirst) {
|
||||
initFirstNode();
|
||||
if (currentRowNode.hasOccurrences()) {
|
||||
if (currentRowNode.isNub()) {
|
||||
nubCellsRemain = true;
|
||||
}
|
||||
currentCellIndex = 0;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
if (currentRowNode.isLeaf()) {
|
||||
discardCurrentRowNode(true);
|
||||
}
|
||||
while (!afterLast) {
|
||||
if (nubCellsRemain) {
|
||||
nubCellsRemain = false;
|
||||
}
|
||||
if (currentRowNode.hasMoreFanNodes()) {
|
||||
followNextFan();
|
||||
if (currentRowNode.hasOccurrences()) {
|
||||
currentCellIndex = 0;
|
||||
return true;
|
||||
}// found some values
|
||||
} else {
|
||||
discardCurrentRowNode(true);
|
||||
}
|
||||
}
|
||||
return false;// went past the end
|
||||
}
|
||||
|
||||
|
||||
/**************** secondary traversal methods ******************************/
|
||||
|
||||
protected void reInitFirstNode() {
|
||||
resetToBeforeFirstEntry();
|
||||
initFirstNode();
|
||||
}
|
||||
|
||||
protected void initFirstNode() {
|
||||
int offsetIntoUnderlyingStructure = blockMeta.getAbsoluteRowOffset();
|
||||
rowNodeStackIndex = 0;
|
||||
currentRowNode = rowNodes[0];
|
||||
currentRowNode.initOnBlock(blockMeta, block, offsetIntoUnderlyingStructure);
|
||||
appendCurrentTokenToRowBuffer();
|
||||
beforeFirst = false;
|
||||
}
|
||||
|
||||
protected void followFirstFan() {
|
||||
followFan(0);
|
||||
}
|
||||
|
||||
protected void followPreviousFan() {
|
||||
int nextFanPosition = currentRowNode.getFanIndex() - 1;
|
||||
followFan(nextFanPosition);
|
||||
}
|
||||
|
||||
protected void followCurrentFan() {
|
||||
int currentFanPosition = currentRowNode.getFanIndex();
|
||||
followFan(currentFanPosition);
|
||||
}
|
||||
|
||||
protected void followNextFan() {
|
||||
int nextFanPosition = currentRowNode.getFanIndex() + 1;
|
||||
followFan(nextFanPosition);
|
||||
}
|
||||
|
||||
protected void followLastFan() {
|
||||
followFan(currentRowNode.getLastFanIndex());
|
||||
}
|
||||
|
||||
protected void followFan(int fanIndex) {
|
||||
currentRowNode.setFanIndex(fanIndex);
|
||||
appendToRowBuffer(currentRowNode.getFanByte(fanIndex));
|
||||
|
||||
int nextOffsetIntoUnderlyingStructure = currentRowNode.getOffset()
|
||||
+ currentRowNode.getNextNodeOffset(fanIndex, blockMeta);
|
||||
++rowNodeStackIndex;
|
||||
|
||||
currentRowNode = rowNodes[rowNodeStackIndex];
|
||||
currentRowNode.initOnBlock(blockMeta, block, nextOffsetIntoUnderlyingStructure);
|
||||
|
||||
//TODO getToken is spewing garbage
|
||||
appendCurrentTokenToRowBuffer();
|
||||
if (currentRowNode.isNub()) {
|
||||
nubCellsRemain = true;
|
||||
}
|
||||
currentCellIndex = 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param forwards which marker to set if we overflow
|
||||
*/
|
||||
protected void discardCurrentRowNode(boolean forwards) {
|
||||
RowNodeReader rowNodeBeingPopped = currentRowNode;
|
||||
--rowNodeStackIndex;// pop it off the stack
|
||||
if (rowNodeStackIndex < 0) {
|
||||
currentRowNode = null;
|
||||
if (forwards) {
|
||||
markAfterLast();
|
||||
} else {
|
||||
markBeforeFirst();
|
||||
}
|
||||
return;
|
||||
}
|
||||
popFromRowBuffer(rowNodeBeingPopped);
|
||||
currentRowNode = rowNodes[rowNodeStackIndex];
|
||||
}
|
||||
|
||||
protected void markBeforeFirst() {
|
||||
beforeFirst = true;
|
||||
afterLast = false;
|
||||
currentRowNode = null;
|
||||
}
|
||||
|
||||
protected void markAfterLast() {
|
||||
beforeFirst = false;
|
||||
afterLast = true;
|
||||
currentRowNode = null;
|
||||
}
|
||||
|
||||
|
||||
/***************** helper methods **************************/
|
||||
|
||||
protected void appendCurrentTokenToRowBuffer() {
|
||||
System.arraycopy(block, currentRowNode.getTokenArrayOffset(), rowBuffer, rowLength,
|
||||
currentRowNode.getTokenLength());
|
||||
rowLength += currentRowNode.getTokenLength();
|
||||
}
|
||||
|
||||
protected void appendToRowBuffer(byte b) {
|
||||
rowBuffer[rowLength] = b;
|
||||
++rowLength;
|
||||
}
|
||||
|
||||
protected void popFromRowBuffer(RowNodeReader rowNodeBeingPopped) {
|
||||
rowLength -= rowNodeBeingPopped.getTokenLength();
|
||||
--rowLength; // pop the parent's fan byte
|
||||
}
|
||||
|
||||
protected boolean hasOccurrences() {
|
||||
return currentRowNode != null && currentRowNode.hasOccurrences();
|
||||
}
|
||||
|
||||
protected boolean isBranch() {
|
||||
return currentRowNode != null && !currentRowNode.hasOccurrences()
|
||||
&& currentRowNode.hasChildren();
|
||||
}
|
||||
|
||||
protected boolean isNub() {
|
||||
return currentRowNode != null && currentRowNode.hasOccurrences()
|
||||
&& currentRowNode.hasChildren();
|
||||
}
|
||||
|
||||
protected boolean isLeaf() {
|
||||
return currentRowNode != null && currentRowNode.hasOccurrences()
|
||||
&& !currentRowNode.hasChildren();
|
||||
}
|
||||
|
||||
//TODO expose this in a PrefixTreeScanner interface
|
||||
public boolean isBeforeFirst(){
|
||||
return beforeFirst;
|
||||
}
|
||||
|
||||
public boolean isAfterLast(){
|
||||
return afterLast;
|
||||
}
|
||||
|
||||
protected boolean isOutOfBounds(){
|
||||
return beforeFirst || afterLast;
|
||||
}
|
||||
|
||||
protected boolean isFirstCellInRow() {
|
||||
return currentCellIndex == 0;
|
||||
}
|
||||
|
||||
protected boolean isLastCellInRow() {
|
||||
return currentCellIndex == currentRowNode.getLastCellIndex();
|
||||
}
|
||||
|
||||
|
||||
/********************* fill in family/qualifier/ts/type/value ************/
|
||||
|
||||
protected int populateNonRowFieldsAndCompareTo(int cellNum, Cell key) {
|
||||
populateNonRowFields(cellNum);
|
||||
return CellComparator.compareStatic(this, key);
|
||||
}
|
||||
|
||||
protected void populateFirstNonRowFields() {
|
||||
populateNonRowFields(0);
|
||||
}
|
||||
|
||||
protected void populatePreviousNonRowFields() {
|
||||
populateNonRowFields(currentCellIndex - 1);
|
||||
}
|
||||
|
||||
protected void populateLastNonRowFields() {
|
||||
populateNonRowFields(currentRowNode.getLastCellIndex());
|
||||
}
|
||||
|
||||
protected void populateNonRowFields(int cellIndex) {
|
||||
currentCellIndex = cellIndex;
|
||||
populateFamily();
|
||||
populateQualifier();
|
||||
populateTimestamp();
|
||||
populateMvccVersion();
|
||||
populateType();
|
||||
populateValueOffsets();
|
||||
}
|
||||
|
||||
protected void populateFamily() {
|
||||
int familyTreeIndex = currentRowNode.getFamilyOffset(currentCellIndex, blockMeta);
|
||||
familyOffset = familyReader.populateBuffer(familyTreeIndex).getColumnOffset();
|
||||
familyLength = familyReader.getColumnLength();
|
||||
}
|
||||
|
||||
protected void populateQualifier() {
|
||||
int qualifierTreeIndex = currentRowNode.getColumnOffset(currentCellIndex, blockMeta);
|
||||
qualifierOffset = qualifierReader.populateBuffer(qualifierTreeIndex).getColumnOffset();
|
||||
qualifierLength = qualifierReader.getColumnLength();
|
||||
}
|
||||
|
||||
protected void populateTimestamp() {
|
||||
if (blockMeta.isAllSameTimestamp()) {
|
||||
timestamp = blockMeta.getMinTimestamp();
|
||||
} else {
|
||||
int timestampIndex = currentRowNode.getTimestampIndex(currentCellIndex, blockMeta);
|
||||
timestamp = timestampDecoder.getLong(timestampIndex);
|
||||
}
|
||||
}
|
||||
|
||||
protected void populateMvccVersion() {
|
||||
if (blockMeta.isAllSameMvccVersion()) {
|
||||
mvccVersion = blockMeta.getMinMvccVersion();
|
||||
} else {
|
||||
int mvccVersionIndex = currentRowNode.getMvccVersionIndex(currentCellIndex,
|
||||
blockMeta);
|
||||
mvccVersion = mvccVersionDecoder.getMvccVersion(mvccVersionIndex);
|
||||
}
|
||||
}
|
||||
|
||||
protected void populateType() {
|
||||
int typeInt;
|
||||
if (blockMeta.isAllSameType()) {
|
||||
typeInt = blockMeta.getAllTypes();
|
||||
} else {
|
||||
typeInt = currentRowNode.getType(currentCellIndex, blockMeta);
|
||||
}
|
||||
type = PrefixTreeCell.TYPES[typeInt];
|
||||
}
|
||||
|
||||
protected void populateValueOffsets() {
|
||||
int offsetIntoValueSection = currentRowNode.getValueOffset(currentCellIndex, blockMeta);
|
||||
absoluteValueOffset = blockMeta.getAbsoluteValueOffset() + offsetIntoValueSection;
|
||||
valueLength = currentRowNode.getValueLength(currentCellIndex, blockMeta);
|
||||
}
|
||||
|
||||
|
||||
/**************** getters ***************************/
|
||||
|
||||
public byte[] getTreeBytes() {
|
||||
return block;
|
||||
}
|
||||
|
||||
public PrefixTreeBlockMeta getBlockMeta() {
|
||||
return blockMeta;
|
||||
}
|
||||
|
||||
public int getMaxRowTreeStackNodes() {
|
||||
return rowNodes.length;
|
||||
}
|
||||
|
||||
public int getRowBufferLength() {
|
||||
return rowBuffer.length;
|
||||
}
|
||||
|
||||
public int getQualifierBufferLength() {
|
||||
return qualifierBuffer.length;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,402 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hbase.codec.prefixtree.decode;
|
||||
|
||||
import org.apache.hadoop.classification.InterfaceAudience;
|
||||
import org.apache.hbase.Cell;
|
||||
import org.apache.hbase.cell.CellScannerPosition;
|
||||
import org.apache.hbase.cell.CellTool;
|
||||
import org.apache.hbase.codec.prefixtree.PrefixTreeBlockMeta;
|
||||
import org.apache.hbase.codec.prefixtree.scanner.CellSearcher;
|
||||
|
||||
import com.google.common.primitives.UnsignedBytes;
|
||||
|
||||
/**
|
||||
* Searcher extends the capabilities of the Scanner + ReversibleScanner to add the ability to
|
||||
* position itself on a requested Cell without scanning through cells before it. The PrefixTree is
|
||||
* set up to be a Trie of rows, so finding a particular row is extremely cheap.
|
||||
* <p/>
|
||||
* Once it finds the row, it does a binary search through the cells inside the row, which is not as
|
||||
* fast as the trie search, but faster than iterating through every cell like existing block formats
|
||||
* do. For this reason, this implementation is targeted towards schemas where rows are narrow enough
|
||||
* to have several or many per block, and where you are generally looking for the entire row or the
|
||||
* first cell. It will still be fast for wide rows or point queries, but could be improved upon.
|
||||
*/
|
||||
@InterfaceAudience.Private
|
||||
public class PrefixTreeArraySearcher extends PrefixTreeArrayReversibleScanner implements
|
||||
CellSearcher {
|
||||
|
||||
/*************** construct ******************************/
|
||||
|
||||
public PrefixTreeArraySearcher(PrefixTreeBlockMeta blockMeta, int rowTreeDepth,
|
||||
int rowBufferLength, int qualifierBufferLength) {
|
||||
super(blockMeta, rowTreeDepth, rowBufferLength, qualifierBufferLength);
|
||||
}
|
||||
|
||||
|
||||
/********************* CellSearcher methods *******************/
|
||||
|
||||
@Override
|
||||
public boolean positionAt(Cell key) {
|
||||
return CellScannerPosition.AT == positionAtOrAfter(key);
|
||||
}
|
||||
|
||||
@Override
|
||||
public CellScannerPosition positionAtOrBefore(Cell key) {
|
||||
reInitFirstNode();
|
||||
int fanIndex = -1;
|
||||
|
||||
while(true){
|
||||
//detect row mismatch. break loop if mismatch
|
||||
int currentNodeDepth = rowLength;
|
||||
int rowTokenComparison = compareToCurrentToken(key);
|
||||
if(rowTokenComparison != 0){
|
||||
return fixRowTokenMissReverse(rowTokenComparison);
|
||||
}
|
||||
|
||||
//exact row found, move on to qualifier & ts
|
||||
if(rowMatchesAfterCurrentPosition(key)){
|
||||
return positionAtQualifierTimestamp(key, true);
|
||||
}
|
||||
|
||||
//detect dead end (no fan to descend into)
|
||||
if(!currentRowNode.hasFan()){
|
||||
if(hasOccurrences()){//must be leaf or nub
|
||||
populateLastNonRowFields();
|
||||
return CellScannerPosition.BEFORE;
|
||||
}else{
|
||||
//TODO i don't think this case is exercised by any tests
|
||||
return fixRowFanMissReverse(0);
|
||||
}
|
||||
}
|
||||
|
||||
//keep hunting for the rest of the row
|
||||
byte searchForByte = CellTool.getRowByte(key, currentNodeDepth);
|
||||
fanIndex = currentRowNode.whichFanNode(searchForByte);
|
||||
if(fanIndex < 0){//no matching row. return early
|
||||
int insertionPoint = -fanIndex;
|
||||
return fixRowFanMissReverse(insertionPoint);
|
||||
}
|
||||
//found a match, so dig deeper into the tree
|
||||
followFan(fanIndex);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Identical workflow as positionAtOrBefore, but split them to avoid having ~10 extra
|
||||
* if-statements. Priority on readability and debugability.
|
||||
*/
|
||||
@Override
|
||||
public CellScannerPosition positionAtOrAfter(Cell key) {
|
||||
reInitFirstNode();
|
||||
int fanIndex = -1;
|
||||
|
||||
while(true){
|
||||
//detect row mismatch. break loop if mismatch
|
||||
int currentNodeDepth = rowLength;
|
||||
int rowTokenComparison = compareToCurrentToken(key);
|
||||
if(rowTokenComparison != 0){
|
||||
return fixRowTokenMissForward(rowTokenComparison);
|
||||
}
|
||||
|
||||
//exact row found, move on to qualifier & ts
|
||||
if(rowMatchesAfterCurrentPosition(key)){
|
||||
return positionAtQualifierTimestamp(key, false);
|
||||
}
|
||||
|
||||
//detect dead end (no fan to descend into)
|
||||
if(!currentRowNode.hasFan()){
|
||||
if(hasOccurrences()){
|
||||
populateFirstNonRowFields();
|
||||
return CellScannerPosition.AFTER;
|
||||
}else{
|
||||
//TODO i don't think this case is exercised by any tests
|
||||
return fixRowFanMissForward(0);
|
||||
}
|
||||
}
|
||||
|
||||
//keep hunting for the rest of the row
|
||||
byte searchForByte = CellTool.getRowByte(key, currentNodeDepth);
|
||||
fanIndex = currentRowNode.whichFanNode(searchForByte);
|
||||
if(fanIndex < 0){//no matching row. return early
|
||||
int insertionPoint = -fanIndex;
|
||||
return fixRowFanMissForward(insertionPoint);
|
||||
}
|
||||
//found a match, so dig deeper into the tree
|
||||
followFan(fanIndex);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean seekForwardTo(Cell key) {
|
||||
if(currentPositionIsAfter(key)){
|
||||
//our position is after the requested key, so can't do anything
|
||||
return false;
|
||||
}
|
||||
return positionAt(key);
|
||||
}
|
||||
|
||||
@Override
|
||||
public CellScannerPosition seekForwardToOrBefore(Cell key) {
|
||||
//Do we even need this check or should upper layers avoid this situation. It's relatively
|
||||
//expensive compared to the rest of the seek operation.
|
||||
if(currentPositionIsAfter(key)){
|
||||
//our position is after the requested key, so can't do anything
|
||||
return CellScannerPosition.AFTER;
|
||||
}
|
||||
|
||||
return positionAtOrBefore(key);
|
||||
}
|
||||
|
||||
@Override
|
||||
public CellScannerPosition seekForwardToOrAfter(Cell key) {
|
||||
//Do we even need this check or should upper layers avoid this situation. It's relatively
|
||||
//expensive compared to the rest of the seek operation.
|
||||
if(currentPositionIsAfter(key)){
|
||||
//our position is after the requested key, so can't do anything
|
||||
return CellScannerPosition.AFTER;
|
||||
}
|
||||
|
||||
return positionAtOrAfter(key);
|
||||
}
|
||||
|
||||
/**
|
||||
* The content of the buffers doesn't matter here, only that afterLast=true and beforeFirst=false
|
||||
*/
|
||||
@Override
|
||||
public void positionAfterLastCell() {
|
||||
resetToBeforeFirstEntry();
|
||||
beforeFirst = false;
|
||||
afterLast = true;
|
||||
}
|
||||
|
||||
|
||||
/***************** Object methods ***************************/
|
||||
|
||||
@Override
|
||||
public boolean equals(Object obj) {
|
||||
//trivial override to confirm intent (findbugs)
|
||||
return super.equals(obj);
|
||||
}
|
||||
|
||||
|
||||
/****************** internal methods ************************/
|
||||
|
||||
protected boolean currentPositionIsAfter(Cell cell){
|
||||
return compareTo(cell) > 0;
|
||||
}
|
||||
|
||||
protected CellScannerPosition positionAtQualifierTimestamp(Cell key, boolean beforeOnMiss) {
|
||||
int minIndex = 0;
|
||||
int maxIndex = currentRowNode.getLastCellIndex();
|
||||
int diff;
|
||||
while (true) {
|
||||
int midIndex = (maxIndex + minIndex) / 2;//don't worry about overflow
|
||||
diff = populateNonRowFieldsAndCompareTo(midIndex, key);
|
||||
|
||||
if (diff == 0) {// found exact match
|
||||
return CellScannerPosition.AT;
|
||||
} else if (minIndex == maxIndex) {// even termination case
|
||||
break;
|
||||
} else if ((minIndex + 1) == maxIndex) {// odd termination case
|
||||
diff = populateNonRowFieldsAndCompareTo(maxIndex, key);
|
||||
if(diff > 0){
|
||||
diff = populateNonRowFieldsAndCompareTo(minIndex, key);
|
||||
}
|
||||
break;
|
||||
} else if (diff < 0) {// keep going forward
|
||||
minIndex = currentCellIndex;
|
||||
} else {// went past it, back up
|
||||
maxIndex = currentCellIndex;
|
||||
}
|
||||
}
|
||||
|
||||
if (diff == 0) {
|
||||
return CellScannerPosition.AT;
|
||||
|
||||
} else if (diff < 0) {// we are before key
|
||||
if (beforeOnMiss) {
|
||||
return CellScannerPosition.BEFORE;
|
||||
}
|
||||
if (next()) {
|
||||
return CellScannerPosition.AFTER;
|
||||
}
|
||||
return CellScannerPosition.AFTER_LAST;
|
||||
|
||||
} else {// we are after key
|
||||
if (!beforeOnMiss) {
|
||||
return CellScannerPosition.AFTER;
|
||||
}
|
||||
if (previous()) {
|
||||
return CellScannerPosition.BEFORE;
|
||||
}
|
||||
return CellScannerPosition.BEFORE_FIRST;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* compare this.row to key.row but starting at the current rowLength
|
||||
* @param key Cell being searched for
|
||||
* @return true if row buffer contents match key.row
|
||||
*/
|
||||
protected boolean rowMatchesAfterCurrentPosition(Cell key) {
|
||||
if (!currentRowNode.hasOccurrences()) {
|
||||
return false;
|
||||
}
|
||||
int thatRowLength = key.getRowLength();
|
||||
if (rowLength != thatRowLength) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// TODO move part of this to Cell comparator?
|
||||
/**
|
||||
* Compare only the bytes within the window of the current token
|
||||
* @param key
|
||||
* @return return -1 if key is lessThan (before) this, 0 if equal, and 1 if key is after
|
||||
*/
|
||||
protected int compareToCurrentToken(Cell key) {
|
||||
int startIndex = rowLength - currentRowNode.getTokenLength();
|
||||
int endIndexExclusive = startIndex + currentRowNode.getTokenLength();
|
||||
for (int i = startIndex; i < endIndexExclusive; ++i) {
|
||||
if (i >= key.getRowLength()) {// key was shorter, so it's first
|
||||
return -1;
|
||||
}
|
||||
byte keyByte = CellTool.getRowByte(key, i);
|
||||
byte thisByte = rowBuffer[i];
|
||||
if (keyByte == thisByte) {
|
||||
continue;
|
||||
}
|
||||
return UnsignedBytes.compare(keyByte, thisByte);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
protected void followLastFansUntilExhausted(){
|
||||
while(currentRowNode.hasFan()){
|
||||
followLastFan();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/****************** complete seek when token mismatch ******************/
|
||||
|
||||
/**
|
||||
* @param searcherIsAfterInputKey <0: input key is before the searcher's position<br/>
|
||||
* >0: input key is after the searcher's position
|
||||
*/
|
||||
protected CellScannerPosition fixRowTokenMissReverse(int searcherIsAfterInputKey) {
|
||||
if (searcherIsAfterInputKey < 0) {//searcher position is after the input key, so back up
|
||||
boolean foundPreviousRow = previousRow(true);
|
||||
if(foundPreviousRow){
|
||||
populateLastNonRowFields();
|
||||
return CellScannerPosition.BEFORE;
|
||||
}else{
|
||||
return CellScannerPosition.BEFORE_FIRST;
|
||||
}
|
||||
|
||||
}else{//searcher position is before the input key
|
||||
if(currentRowNode.hasOccurrences()){
|
||||
populateFirstNonRowFields();
|
||||
return CellScannerPosition.BEFORE;
|
||||
}
|
||||
boolean foundNextRow = nextRow();
|
||||
if(foundNextRow){
|
||||
return CellScannerPosition.AFTER;
|
||||
}else{
|
||||
return CellScannerPosition.AFTER_LAST;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param searcherIsAfterInputKey <0: input key is before the searcher's position<br/>
|
||||
* >0: input key is after the searcher's position
|
||||
*/
|
||||
protected CellScannerPosition fixRowTokenMissForward(int searcherIsAfterInputKey) {
|
||||
if (searcherIsAfterInputKey < 0) {//searcher position is after the input key
|
||||
if(currentRowNode.hasOccurrences()){
|
||||
populateFirstNonRowFields();
|
||||
return CellScannerPosition.AFTER;
|
||||
}
|
||||
boolean foundNextRow = nextRow();
|
||||
if(foundNextRow){
|
||||
return CellScannerPosition.AFTER;
|
||||
}else{
|
||||
return CellScannerPosition.AFTER_LAST;
|
||||
}
|
||||
|
||||
}else{//searcher position is before the input key, so go forward
|
||||
discardCurrentRowNode(true);
|
||||
boolean foundNextRow = nextRow();
|
||||
if(foundNextRow){
|
||||
return CellScannerPosition.AFTER;
|
||||
}else{
|
||||
return CellScannerPosition.AFTER_LAST;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/****************** complete seek when fan mismatch ******************/
|
||||
|
||||
protected CellScannerPosition fixRowFanMissReverse(int fanInsertionPoint){
|
||||
if(fanInsertionPoint == 0){//we need to back up a row
|
||||
boolean foundPreviousRow = previousRow(true);//true -> position on last cell in row
|
||||
if(foundPreviousRow){
|
||||
populateLastNonRowFields();
|
||||
return CellScannerPosition.BEFORE;
|
||||
}
|
||||
return CellScannerPosition.BEFORE_FIRST;
|
||||
}
|
||||
|
||||
//follow the previous fan, but then descend recursively forward
|
||||
followFan(fanInsertionPoint - 1);
|
||||
followLastFansUntilExhausted();
|
||||
populateLastNonRowFields();
|
||||
return CellScannerPosition.BEFORE;
|
||||
}
|
||||
|
||||
protected CellScannerPosition fixRowFanMissForward(int fanInsertionPoint){
|
||||
if(fanInsertionPoint >= currentRowNode.getFanOut()){
|
||||
discardCurrentRowNode(true);
|
||||
if (!nextRow()) {
|
||||
return CellScannerPosition.AFTER_LAST;
|
||||
} else {
|
||||
return CellScannerPosition.AFTER;
|
||||
}
|
||||
}
|
||||
|
||||
followFan(fanInsertionPoint);
|
||||
if(hasOccurrences()){
|
||||
populateFirstNonRowFields();
|
||||
return CellScannerPosition.AFTER;
|
||||
}
|
||||
|
||||
if(nextRowInternal()){
|
||||
populateFirstNonRowFields();
|
||||
return CellScannerPosition.AFTER;
|
||||
|
||||
}else{
|
||||
return CellScannerPosition.AFTER_LAST;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,197 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hbase.codec.prefixtree.decode;
|
||||
|
||||
import org.apache.hadoop.classification.InterfaceAudience;
|
||||
import org.apache.hadoop.hbase.KeyValue;
|
||||
import org.apache.hadoop.hbase.KeyValueTool;
|
||||
import org.apache.hbase.Cell;
|
||||
import org.apache.hbase.cell.CellComparator;
|
||||
|
||||
/**
|
||||
* As the PrefixTreeArrayScanner moves through the tree bytes, it changes the values in the fields
|
||||
* of this class so that Cell logic can be applied, but without allocating new memory for every Cell
|
||||
* iterated through.
|
||||
*/
|
||||
@InterfaceAudience.Private
|
||||
public class PrefixTreeCell implements Cell, Comparable<Cell> {
|
||||
|
||||
/********************** static **********************/
|
||||
|
||||
public static final KeyValue.Type[] TYPES = new KeyValue.Type[256];
|
||||
static {
|
||||
for (KeyValue.Type type : KeyValue.Type.values()) {
|
||||
TYPES[type.getCode() & 0xff] = type;
|
||||
}
|
||||
}
|
||||
|
||||
//Same as KeyValue constructor. Only used to avoid NPE's when full cell hasn't been initialized.
|
||||
public static final KeyValue.Type DEFAULT_TYPE = KeyValue.Type.Put;
|
||||
|
||||
/******************** fields ************************/
|
||||
|
||||
protected byte[] block;
|
||||
//we could also avoid setting the mvccVersion in the scanner/searcher, but this is simpler
|
||||
protected boolean includeMvccVersion;
|
||||
|
||||
protected byte[] rowBuffer;
|
||||
protected int rowLength;
|
||||
|
||||
protected byte[] familyBuffer;
|
||||
protected int familyOffset;
|
||||
protected int familyLength;
|
||||
|
||||
protected byte[] qualifierBuffer;// aligned to the end of the array
|
||||
protected int qualifierOffset;
|
||||
protected int qualifierLength;
|
||||
|
||||
protected Long timestamp;
|
||||
protected Long mvccVersion;
|
||||
|
||||
protected KeyValue.Type type;
|
||||
|
||||
protected int absoluteValueOffset;
|
||||
protected int valueLength;
|
||||
|
||||
|
||||
/********************** Cell methods ******************/
|
||||
|
||||
/**
|
||||
* For debugging. Currently creates new KeyValue to utilize its toString() method.
|
||||
*/
|
||||
@Override
|
||||
public String toString() {
|
||||
return getKeyValueString();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object obj) {
|
||||
if (!(obj instanceof Cell)) {
|
||||
return false;
|
||||
}
|
||||
//Temporary hack to maintain backwards compatibility with KeyValue.equals
|
||||
return CellComparator.equalsIgnoreMvccVersion(this, (Cell)obj);
|
||||
|
||||
//TODO return CellComparator.equals(this, (Cell)obj);//see HBASE-6907
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode(){
|
||||
//Temporary hack to maintain backwards compatibility with KeyValue.hashCode
|
||||
//I don't think this is used in any hot code paths
|
||||
return KeyValueTool.copyToNewKeyValue(this).hashCode();
|
||||
|
||||
//TODO return CellComparator.hashCode(this);//see HBASE-6907
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareTo(Cell other) {
|
||||
return CellComparator.compareStatic(this, other);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getTimestamp() {
|
||||
return timestamp;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getMvccVersion() {
|
||||
if (!includeMvccVersion) {
|
||||
return 0L;
|
||||
}
|
||||
return mvccVersion;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getValueLength() {
|
||||
return valueLength;
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte[] getRowArray() {
|
||||
return rowBuffer;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getRowOffset() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public short getRowLength() {
|
||||
return (short) rowLength;
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte[] getFamilyArray() {
|
||||
return familyBuffer;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getFamilyOffset() {
|
||||
return familyOffset;
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte getFamilyLength() {
|
||||
return (byte) familyLength;
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte[] getQualifierArray() {
|
||||
return qualifierBuffer;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getQualifierOffset() {
|
||||
return qualifierOffset;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getQualifierLength() {
|
||||
return qualifierLength;
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte[] getValueArray() {
|
||||
return block;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getValueOffset() {
|
||||
return absoluteValueOffset;
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte getTypeByte() {
|
||||
return type.getCode();
|
||||
}
|
||||
|
||||
|
||||
/************************* helper methods *************************/
|
||||
|
||||
/**
|
||||
* Need this separate method so we can call it from subclasses' toString() methods
|
||||
*/
|
||||
protected String getKeyValueString(){
|
||||
KeyValue kv = KeyValueTool.copyToNewKeyValue(this);
|
||||
return kv.toString();
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,104 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hbase.codec.prefixtree.decode.column;
|
||||
|
||||
import org.apache.hadoop.classification.InterfaceAudience;
|
||||
import org.apache.hbase.codec.prefixtree.PrefixTreeBlockMeta;
|
||||
import org.apache.hbase.util.vint.UFIntTool;
|
||||
import org.apache.hbase.util.vint.UVIntTool;
|
||||
|
||||
@InterfaceAudience.Private
|
||||
public class ColumnNodeReader {
|
||||
|
||||
/**************** fields ************************/
|
||||
|
||||
protected PrefixTreeBlockMeta blockMeta;
|
||||
protected byte[] block;
|
||||
|
||||
protected byte[] columnBuffer;
|
||||
protected boolean familyVsQualifier;
|
||||
|
||||
protected int offsetIntoBlock;
|
||||
|
||||
protected int tokenOffsetIntoBlock;
|
||||
protected int tokenLength;
|
||||
protected int parentStartPosition;
|
||||
|
||||
|
||||
/************** construct *************************/
|
||||
|
||||
public ColumnNodeReader(byte[] columnBuffer, boolean familyVsQualifier) {
|
||||
this.columnBuffer = columnBuffer;
|
||||
this.familyVsQualifier = familyVsQualifier;
|
||||
}
|
||||
|
||||
public void initOnBlock(PrefixTreeBlockMeta blockMeta, byte[] block) {
|
||||
this.blockMeta = blockMeta;
|
||||
this.block = block;
|
||||
}
|
||||
|
||||
|
||||
/************* methods *****************************/
|
||||
|
||||
public void positionAt(int offsetIntoBlock) {
|
||||
this.offsetIntoBlock = offsetIntoBlock;
|
||||
tokenLength = UVIntTool.getInt(block, offsetIntoBlock);
|
||||
tokenOffsetIntoBlock = offsetIntoBlock + UVIntTool.numBytes(tokenLength);
|
||||
int parentStartPositionIndex = tokenOffsetIntoBlock + tokenLength;
|
||||
int offsetWidth;
|
||||
if (familyVsQualifier) {
|
||||
offsetWidth = blockMeta.getFamilyOffsetWidth();
|
||||
} else {
|
||||
offsetWidth = blockMeta.getQualifierOffsetWidth();
|
||||
}
|
||||
parentStartPosition = (int) UFIntTool.fromBytes(block, parentStartPositionIndex, offsetWidth);
|
||||
}
|
||||
|
||||
public void prependTokenToBuffer(int bufferStartIndex) {
|
||||
System.arraycopy(block, tokenOffsetIntoBlock, columnBuffer, bufferStartIndex, tokenLength);
|
||||
}
|
||||
|
||||
public boolean isRoot() {
|
||||
if (familyVsQualifier) {
|
||||
return offsetIntoBlock == blockMeta.getAbsoluteFamilyOffset();
|
||||
} else {
|
||||
return offsetIntoBlock == blockMeta.getAbsoluteQualifierOffset();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/************** standard methods *********************/
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return super.toString() + "[" + offsetIntoBlock + "]";
|
||||
}
|
||||
|
||||
|
||||
/****************** get/set ****************************/
|
||||
|
||||
public int getTokenLength() {
|
||||
return tokenLength;
|
||||
}
|
||||
|
||||
public int getParentStartPosition() {
|
||||
return parentStartPosition;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,104 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hbase.codec.prefixtree.decode.column;
|
||||
|
||||
import org.apache.hadoop.classification.InterfaceAudience;
|
||||
import org.apache.hbase.codec.prefixtree.PrefixTreeBlockMeta;
|
||||
|
||||
/**
|
||||
* Position one of these appropriately in the data block and you can call its methods to retrieve
|
||||
* the family or qualifier at the current position.
|
||||
*/
|
||||
@InterfaceAudience.Private
|
||||
public class ColumnReader {
|
||||
|
||||
/****************** fields *************************/
|
||||
|
||||
protected PrefixTreeBlockMeta blockMeta;
|
||||
|
||||
protected byte[] columnBuffer;
|
||||
protected int columnOffset;
|
||||
protected int columnLength;
|
||||
protected boolean familyVsQualifier;
|
||||
|
||||
protected ColumnNodeReader columnNodeReader;
|
||||
|
||||
|
||||
/******************** construct *******************/
|
||||
|
||||
public ColumnReader(byte[] columnBuffer, boolean familyVsQualifier) {
|
||||
this.columnBuffer = columnBuffer;
|
||||
this.familyVsQualifier = familyVsQualifier;
|
||||
this.columnNodeReader = new ColumnNodeReader(columnBuffer, familyVsQualifier);
|
||||
}
|
||||
|
||||
public void initOnBlock(PrefixTreeBlockMeta blockMeta, byte[] block) {
|
||||
this.blockMeta = blockMeta;
|
||||
clearColumnBuffer();
|
||||
columnNodeReader.initOnBlock(blockMeta, block);
|
||||
}
|
||||
|
||||
|
||||
/********************* methods *******************/
|
||||
|
||||
public ColumnReader populateBuffer(int offsetIntoColumnData) {
|
||||
clearColumnBuffer();
|
||||
int nextRelativeOffset = offsetIntoColumnData;
|
||||
while (true) {
|
||||
int absoluteOffset;
|
||||
if (familyVsQualifier) {
|
||||
absoluteOffset = blockMeta.getAbsoluteFamilyOffset() + nextRelativeOffset;
|
||||
} else {
|
||||
absoluteOffset = blockMeta.getAbsoluteQualifierOffset() + nextRelativeOffset;
|
||||
}
|
||||
columnNodeReader.positionAt(absoluteOffset);
|
||||
columnOffset -= columnNodeReader.getTokenLength();
|
||||
columnLength += columnNodeReader.getTokenLength();
|
||||
columnNodeReader.prependTokenToBuffer(columnOffset);
|
||||
if (columnNodeReader.isRoot()) {
|
||||
return this;
|
||||
}
|
||||
nextRelativeOffset = columnNodeReader.getParentStartPosition();
|
||||
}
|
||||
}
|
||||
|
||||
public byte[] copyBufferToNewArray() {// for testing
|
||||
byte[] out = new byte[columnLength];
|
||||
System.arraycopy(columnBuffer, columnOffset, out, 0, out.length);
|
||||
return out;
|
||||
}
|
||||
|
||||
public int getColumnLength() {
|
||||
return columnLength;
|
||||
}
|
||||
|
||||
public void clearColumnBuffer() {
|
||||
columnOffset = columnBuffer.length;
|
||||
columnLength = 0;
|
||||
}
|
||||
|
||||
|
||||
/****************************** get/set *************************************/
|
||||
|
||||
public int getColumnOffset() {
|
||||
return columnOffset;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,267 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hbase.codec.prefixtree.decode.row;
|
||||
|
||||
import org.apache.hadoop.classification.InterfaceAudience;
|
||||
import org.apache.hadoop.hbase.util.ByteRange;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
import org.apache.hbase.codec.prefixtree.PrefixTreeBlockMeta;
|
||||
import org.apache.hbase.util.vint.UFIntTool;
|
||||
import org.apache.hbase.util.vint.UVIntTool;
|
||||
|
||||
/**
|
||||
* Position one of these appropriately in the data block and you can call its methods to retrieve
|
||||
* information necessary to decode the cells in the row.
|
||||
*/
|
||||
@InterfaceAudience.Private
|
||||
public class RowNodeReader {
|
||||
|
||||
/************* fields ***********************************/
|
||||
|
||||
protected byte[] block;
|
||||
protected int offset;
|
||||
protected int fanIndex;
|
||||
|
||||
protected int numCells;
|
||||
|
||||
protected int tokenOffset;
|
||||
protected int tokenLength;
|
||||
protected int fanOffset;
|
||||
protected int fanOut;
|
||||
|
||||
protected int familyOffsetsOffset;
|
||||
protected int qualifierOffsetsOffset;
|
||||
protected int timestampIndexesOffset;
|
||||
protected int mvccVersionIndexesOffset;
|
||||
protected int operationTypesOffset;
|
||||
protected int valueOffsetsOffset;
|
||||
protected int valueLengthsOffset;
|
||||
protected int nextNodeOffsetsOffset;
|
||||
|
||||
|
||||
/******************* construct **************************/
|
||||
|
||||
public void initOnBlock(PrefixTreeBlockMeta blockMeta, byte[] block, int offset) {
|
||||
this.block = block;
|
||||
|
||||
this.offset = offset;
|
||||
resetFanIndex();
|
||||
|
||||
this.tokenLength = UVIntTool.getInt(block, offset);
|
||||
this.tokenOffset = offset + UVIntTool.numBytes(tokenLength);
|
||||
|
||||
this.fanOut = UVIntTool.getInt(block, tokenOffset + tokenLength);
|
||||
this.fanOffset = tokenOffset + tokenLength + UVIntTool.numBytes(fanOut);
|
||||
|
||||
this.numCells = UVIntTool.getInt(block, fanOffset + fanOut);
|
||||
|
||||
this.familyOffsetsOffset = fanOffset + fanOut + UVIntTool.numBytes(numCells);
|
||||
this.qualifierOffsetsOffset = familyOffsetsOffset + numCells * blockMeta.getFamilyOffsetWidth();
|
||||
this.timestampIndexesOffset = qualifierOffsetsOffset + numCells
|
||||
* blockMeta.getQualifierOffsetWidth();
|
||||
this.mvccVersionIndexesOffset = timestampIndexesOffset + numCells
|
||||
* blockMeta.getTimestampIndexWidth();
|
||||
this.operationTypesOffset = mvccVersionIndexesOffset + numCells
|
||||
* blockMeta.getMvccVersionIndexWidth();
|
||||
this.valueOffsetsOffset = operationTypesOffset + numCells * blockMeta.getKeyValueTypeWidth();
|
||||
this.valueLengthsOffset = valueOffsetsOffset + numCells * blockMeta.getValueOffsetWidth();
|
||||
this.nextNodeOffsetsOffset = valueLengthsOffset + numCells * blockMeta.getValueLengthWidth();
|
||||
}
|
||||
|
||||
|
||||
/******************** methods ****************************/
|
||||
|
||||
public boolean isLeaf() {
|
||||
return fanOut == 0;
|
||||
}
|
||||
|
||||
public boolean isNub() {
|
||||
return fanOut > 0 && numCells > 0;
|
||||
}
|
||||
|
||||
public boolean isBranch() {
|
||||
return fanOut > 0 && numCells == 0;
|
||||
}
|
||||
|
||||
public boolean hasOccurrences() {
|
||||
return numCells > 0;
|
||||
}
|
||||
|
||||
public int getTokenArrayOffset(){
|
||||
return tokenOffset;
|
||||
}
|
||||
|
||||
public int getTokenLength() {
|
||||
return tokenLength;
|
||||
}
|
||||
|
||||
public byte getFanByte(int i) {
|
||||
return block[fanOffset + i];
|
||||
}
|
||||
|
||||
/**
|
||||
* for debugging
|
||||
*/
|
||||
protected String getFanByteReadable(int i){
|
||||
return Bytes.toStringBinary(block, fanOffset + i, 1);
|
||||
}
|
||||
|
||||
public int getFamilyOffset(int index, PrefixTreeBlockMeta blockMeta) {
|
||||
int fIntWidth = blockMeta.getFamilyOffsetWidth();
|
||||
int startIndex = familyOffsetsOffset + fIntWidth * index;
|
||||
return (int) UFIntTool.fromBytes(block, startIndex, fIntWidth);
|
||||
}
|
||||
|
||||
public int getColumnOffset(int index, PrefixTreeBlockMeta blockMeta) {
|
||||
int fIntWidth = blockMeta.getQualifierOffsetWidth();
|
||||
int startIndex = qualifierOffsetsOffset + fIntWidth * index;
|
||||
return (int) UFIntTool.fromBytes(block, startIndex, fIntWidth);
|
||||
}
|
||||
|
||||
public int getTimestampIndex(int index, PrefixTreeBlockMeta blockMeta) {
|
||||
int fIntWidth = blockMeta.getTimestampIndexWidth();
|
||||
int startIndex = timestampIndexesOffset + fIntWidth * index;
|
||||
return (int) UFIntTool.fromBytes(block, startIndex, fIntWidth);
|
||||
}
|
||||
|
||||
public int getMvccVersionIndex(int index, PrefixTreeBlockMeta blockMeta) {
|
||||
int fIntWidth = blockMeta.getMvccVersionIndexWidth();
|
||||
int startIndex = mvccVersionIndexesOffset + fIntWidth * index;
|
||||
return (int) UFIntTool.fromBytes(block, startIndex, fIntWidth);
|
||||
}
|
||||
|
||||
public int getType(int index, PrefixTreeBlockMeta blockMeta) {
|
||||
if (blockMeta.isAllSameType()) {
|
||||
return blockMeta.getAllTypes();
|
||||
}
|
||||
return block[operationTypesOffset + index];
|
||||
}
|
||||
|
||||
public int getValueOffset(int index, PrefixTreeBlockMeta blockMeta) {
|
||||
int fIntWidth = blockMeta.getValueOffsetWidth();
|
||||
int startIndex = valueOffsetsOffset + fIntWidth * index;
|
||||
int offset = (int) UFIntTool.fromBytes(block, startIndex, fIntWidth);
|
||||
return offset;
|
||||
}
|
||||
|
||||
public int getValueLength(int index, PrefixTreeBlockMeta blockMeta) {
|
||||
int fIntWidth = blockMeta.getValueLengthWidth();
|
||||
int startIndex = valueLengthsOffset + fIntWidth * index;
|
||||
int length = (int) UFIntTool.fromBytes(block, startIndex, fIntWidth);
|
||||
return length;
|
||||
}
|
||||
|
||||
public int getNextNodeOffset(int index, PrefixTreeBlockMeta blockMeta) {
|
||||
int fIntWidth = blockMeta.getNextNodeOffsetWidth();
|
||||
int startIndex = nextNodeOffsetsOffset + fIntWidth * index;
|
||||
return (int) UFIntTool.fromBytes(block, startIndex, fIntWidth);
|
||||
}
|
||||
|
||||
public String getBranchNubLeafIndicator() {
|
||||
if (isNub()) {
|
||||
return "N";
|
||||
}
|
||||
return isBranch() ? "B" : "L";
|
||||
}
|
||||
|
||||
public boolean hasChildren() {
|
||||
return fanOut > 0;
|
||||
}
|
||||
|
||||
public int getLastFanIndex() {
|
||||
return fanOut - 1;
|
||||
}
|
||||
|
||||
public int getLastCellIndex() {
|
||||
return numCells - 1;
|
||||
}
|
||||
|
||||
public int getNumCells() {
|
||||
return numCells;
|
||||
}
|
||||
|
||||
public int getFanOut() {
|
||||
return fanOut;
|
||||
}
|
||||
|
||||
public byte[] getToken() {
|
||||
// TODO pass in reusable ByteRange
|
||||
return new ByteRange(block, tokenOffset, tokenLength).deepCopyToNewArray();
|
||||
}
|
||||
|
||||
public int getOffset() {
|
||||
return offset;
|
||||
}
|
||||
|
||||
public int whichFanNode(byte searchForByte) {
|
||||
if( ! hasFan()){
|
||||
throw new IllegalStateException("This row node has no fan, so can't search it");
|
||||
}
|
||||
int fanIndexInBlock = Bytes.unsignedBinarySearch(block, fanOffset, fanOffset + fanOut,
|
||||
searchForByte);
|
||||
if (fanIndexInBlock >= 0) {// found it, but need to adjust for position of fan in overall block
|
||||
return fanIndexInBlock - fanOffset;
|
||||
}
|
||||
return fanIndexInBlock + fanOffset + 1;// didn't find it, so compensate in reverse
|
||||
}
|
||||
|
||||
public void resetFanIndex() {
|
||||
fanIndex = -1;// just the way the logic currently works
|
||||
}
|
||||
|
||||
public int getFanIndex() {
|
||||
return fanIndex;
|
||||
}
|
||||
|
||||
public void setFanIndex(int fanIndex) {
|
||||
this.fanIndex = fanIndex;
|
||||
}
|
||||
|
||||
public boolean hasFan(){
|
||||
return fanOut > 0;
|
||||
}
|
||||
|
||||
public boolean hasPreviousFanNodes() {
|
||||
return fanOut > 0 && fanIndex > 0;
|
||||
}
|
||||
|
||||
public boolean hasMoreFanNodes() {
|
||||
return fanIndex < getLastFanIndex();
|
||||
}
|
||||
|
||||
public boolean isOnLastFanNode() {
|
||||
return !hasMoreFanNodes();
|
||||
}
|
||||
|
||||
|
||||
/*************** standard methods **************************/
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
sb.append("fan:" + Bytes.toStringBinary(block, fanOffset, fanOut));
|
||||
sb.append(",token:" + Bytes.toStringBinary(block, tokenOffset, tokenLength));
|
||||
sb.append(",numCells:" + numCells);
|
||||
sb.append(",fanIndex:"+fanIndex);
|
||||
if(fanIndex>=0){
|
||||
sb.append("("+getFanByteReadable(fanIndex)+")");
|
||||
}
|
||||
return sb.toString();
|
||||
}
|
||||
}
|
|
@ -0,0 +1,57 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hbase.codec.prefixtree.decode.timestamp;
|
||||
|
||||
import org.apache.hadoop.classification.InterfaceAudience;
|
||||
import org.apache.hbase.codec.prefixtree.PrefixTreeBlockMeta;
|
||||
import org.apache.hbase.util.vint.UFIntTool;
|
||||
|
||||
/**
|
||||
* Given a block and its blockMeta, this will decode the MvccVersion for the i-th Cell in the block.
|
||||
*/
|
||||
@InterfaceAudience.Private
|
||||
public class MvccVersionDecoder {
|
||||
|
||||
protected PrefixTreeBlockMeta blockMeta;
|
||||
protected byte[] block;
|
||||
|
||||
|
||||
/************** construct ***********************/
|
||||
|
||||
public MvccVersionDecoder() {
|
||||
}
|
||||
|
||||
public void initOnBlock(PrefixTreeBlockMeta blockMeta, byte[] block) {
|
||||
this.block = block;
|
||||
this.blockMeta = blockMeta;
|
||||
}
|
||||
|
||||
|
||||
/************** methods *************************/
|
||||
|
||||
public long getMvccVersion(int index) {
|
||||
if (blockMeta.getMvccVersionIndexWidth() == 0) {//all mvccVersions in the block were identical
|
||||
return blockMeta.getMinMvccVersion();
|
||||
}
|
||||
int startIndex = blockMeta.getAbsoluteMvccVersionOffset()
|
||||
+ blockMeta.getMvccVersionDeltaWidth() * index;
|
||||
long delta = UFIntTool.fromBytes(block, startIndex, blockMeta.getMvccVersionDeltaWidth());
|
||||
return blockMeta.getMinMvccVersion() + delta;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,57 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hbase.codec.prefixtree.decode.timestamp;
|
||||
|
||||
import org.apache.hadoop.classification.InterfaceAudience;
|
||||
import org.apache.hbase.codec.prefixtree.PrefixTreeBlockMeta;
|
||||
import org.apache.hbase.util.vint.UFIntTool;
|
||||
|
||||
/**
|
||||
* Given a block and its blockMeta, this will decode the timestamp for the i-th Cell in the block.
|
||||
*/
|
||||
@InterfaceAudience.Private
|
||||
public class TimestampDecoder {
|
||||
|
||||
protected PrefixTreeBlockMeta blockMeta;
|
||||
protected byte[] block;
|
||||
|
||||
|
||||
/************** construct ***********************/
|
||||
|
||||
public TimestampDecoder() {
|
||||
}
|
||||
|
||||
public void initOnBlock(PrefixTreeBlockMeta blockMeta, byte[] block) {
|
||||
this.block = block;
|
||||
this.blockMeta = blockMeta;
|
||||
}
|
||||
|
||||
|
||||
/************** methods *************************/
|
||||
|
||||
public long getLong(int index) {
|
||||
if (blockMeta.getTimestampIndexWidth() == 0) {//all timestamps in the block were identical
|
||||
return blockMeta.getMinTimestamp();
|
||||
}
|
||||
int startIndex = blockMeta.getAbsoluteTimestampOffset() + blockMeta.getTimestampDeltaWidth()
|
||||
* index;
|
||||
long delta = UFIntTool.fromBytes(block, startIndex, blockMeta.getTimestampDeltaWidth());
|
||||
return blockMeta.getMinTimestamp() + delta;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,56 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hbase.codec.prefixtree.encode;
|
||||
|
||||
import java.io.OutputStream;
|
||||
|
||||
import org.apache.hadoop.classification.InterfaceAudience;
|
||||
|
||||
/**
|
||||
* Retrieve PrefixTreeEncoders from this factory which handles pooling them and preparing the
|
||||
* ones retrieved from the pool for usage.
|
||||
*/
|
||||
@InterfaceAudience.Private
|
||||
public class EncoderFactory {
|
||||
|
||||
private static final EncoderPool POOL = new ThreadLocalEncoderPool();
|
||||
|
||||
|
||||
public static PrefixTreeEncoder checkOut(OutputStream outputStream, boolean includeMvccVersion) {
|
||||
return POOL.checkOut(outputStream, includeMvccVersion);
|
||||
}
|
||||
|
||||
public static void checkIn(PrefixTreeEncoder encoder) {
|
||||
POOL.checkIn(encoder);
|
||||
}
|
||||
|
||||
|
||||
/**************************** helper ******************************/
|
||||
|
||||
protected static PrefixTreeEncoder prepareEncoder(PrefixTreeEncoder encoder,
|
||||
OutputStream outputStream, boolean includeMvccVersion) {
|
||||
PrefixTreeEncoder ret = encoder;
|
||||
if (encoder == null) {
|
||||
ret = new PrefixTreeEncoder(outputStream, includeMvccVersion);
|
||||
}
|
||||
ret.reset(outputStream, includeMvccVersion);
|
||||
return ret;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,32 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hbase.codec.prefixtree.encode;
|
||||
|
||||
import java.io.OutputStream;
|
||||
|
||||
import org.apache.hadoop.classification.InterfaceAudience;
|
||||
|
||||
|
||||
@InterfaceAudience.Private
|
||||
public interface EncoderPool {
|
||||
|
||||
PrefixTreeEncoder checkOut(OutputStream outputStream, boolean includeMvccVersion);
|
||||
void checkIn(PrefixTreeEncoder encoder);
|
||||
|
||||
}
|
|
@ -0,0 +1,494 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hbase.codec.prefixtree.encode;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.OutputStream;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.hadoop.classification.InterfaceAudience;
|
||||
import org.apache.hadoop.hbase.KeyValueTool;
|
||||
import org.apache.hadoop.hbase.util.ArrayUtils;
|
||||
import org.apache.hadoop.hbase.util.ByteRange;
|
||||
import org.apache.hadoop.io.WritableUtils;
|
||||
import org.apache.hbase.Cell;
|
||||
import org.apache.hbase.cell.CellOutputStream;
|
||||
import org.apache.hbase.cell.CellTool;
|
||||
import org.apache.hbase.codec.prefixtree.PrefixTreeBlockMeta;
|
||||
import org.apache.hbase.codec.prefixtree.encode.column.ColumnSectionWriter;
|
||||
import org.apache.hbase.codec.prefixtree.encode.other.CellTypeEncoder;
|
||||
import org.apache.hbase.codec.prefixtree.encode.other.LongEncoder;
|
||||
import org.apache.hbase.codec.prefixtree.encode.row.RowSectionWriter;
|
||||
import org.apache.hbase.codec.prefixtree.encode.tokenize.Tokenizer;
|
||||
import org.apache.hbase.util.byterange.ByteRangeSet;
|
||||
import org.apache.hbase.util.byterange.impl.ByteRangeHashSet;
|
||||
import org.apache.hbase.util.byterange.impl.ByteRangeTreeSet;
|
||||
import org.apache.hbase.util.vint.UFIntTool;
|
||||
|
||||
/**
|
||||
* This is the primary class for converting a CellOutputStream into an encoded byte[]. As Cells are
|
||||
* added they are completely copied into the various encoding structures. This is important because
|
||||
* usually the cells being fed in during compactions will be transient.<br/>
|
||||
* <br/>
|
||||
* Usage:<br/>
|
||||
* 1) constructor<br/>
|
||||
* 4) append cells in sorted order: write(Cell cell)<br/>
|
||||
* 5) flush()<br/>
|
||||
*/
|
||||
@InterfaceAudience.Private
|
||||
public class PrefixTreeEncoder implements CellOutputStream {
|
||||
|
||||
/**************** static ************************/
|
||||
|
||||
protected static final Log LOG = LogFactory.getLog(PrefixTreeEncoder.class);
|
||||
|
||||
//future-proof where HBase supports multiple families in a data block.
|
||||
public static final boolean MULITPLE_FAMILIES_POSSIBLE = false;
|
||||
|
||||
private static final boolean USE_HASH_COLUMN_SORTER = true;
|
||||
private static final int INITIAL_PER_CELL_ARRAY_SIZES = 256;
|
||||
private static final int VALUE_BUFFER_INIT_SIZE = 64 * 1024;
|
||||
|
||||
|
||||
/**************** fields *************************/
|
||||
|
||||
protected long numResets = 0L;
|
||||
|
||||
protected OutputStream outputStream;
|
||||
|
||||
/*
|
||||
* Cannot change during a single block's encoding. If false, then substitute incoming Cell's
|
||||
* mvccVersion with zero and write out the block as usual.
|
||||
*/
|
||||
protected boolean includeMvccVersion;
|
||||
|
||||
/*
|
||||
* reusable ByteRanges used for communicating with the sorters/compilers
|
||||
*/
|
||||
protected ByteRange rowRange;
|
||||
protected ByteRange familyRange;
|
||||
protected ByteRange qualifierRange;
|
||||
|
||||
/*
|
||||
* incoming Cell fields are copied into these arrays
|
||||
*/
|
||||
protected long[] timestamps;
|
||||
protected long[] mvccVersions;
|
||||
protected byte[] typeBytes;
|
||||
protected int[] valueOffsets;
|
||||
protected byte[] values;
|
||||
|
||||
protected PrefixTreeBlockMeta blockMeta;
|
||||
|
||||
/*
|
||||
* Sub-encoders for the simple long/byte fields of a Cell. Add to these as each cell arrives and
|
||||
* compile before flushing.
|
||||
*/
|
||||
protected LongEncoder timestampEncoder;
|
||||
protected LongEncoder mvccVersionEncoder;
|
||||
protected CellTypeEncoder cellTypeEncoder;
|
||||
|
||||
/*
|
||||
* Structures used for collecting families and qualifiers, de-duplicating them, and sorting them
|
||||
* so they can be passed to the tokenizers. Unlike row keys where we can detect duplicates by
|
||||
* comparing only with the previous row key, families and qualifiers can arrive in unsorted order
|
||||
* in blocks spanning multiple rows. We must collect them all into a set to de-duplicate them.
|
||||
*/
|
||||
protected ByteRangeSet familyDeduplicator;
|
||||
protected ByteRangeSet qualifierDeduplicator;
|
||||
|
||||
/*
|
||||
* Feed sorted byte[]s into these tokenizers which will convert the byte[]s to an in-memory
|
||||
* trie structure with nodes connected by memory pointers (not serializable yet).
|
||||
*/
|
||||
protected Tokenizer rowTokenizer;
|
||||
protected Tokenizer familyTokenizer;
|
||||
protected Tokenizer qualifierTokenizer;
|
||||
|
||||
/*
|
||||
* Writers take an in-memory trie, sort the nodes, calculate offsets and lengths, and write
|
||||
* all information to an output stream of bytes that can be stored on disk.
|
||||
*/
|
||||
protected RowSectionWriter rowWriter;
|
||||
protected ColumnSectionWriter familyWriter;
|
||||
protected ColumnSectionWriter qualifierWriter;
|
||||
|
||||
/*
|
||||
* Integers used for counting cells and bytes. We keep track of the size of the Cells as if they
|
||||
* were full KeyValues because some parts of HBase like to know the "unencoded size".
|
||||
*/
|
||||
protected int totalCells = 0;
|
||||
protected int totalUnencodedBytes = 0;//numBytes if the cells were KeyValues
|
||||
protected int totalValueBytes = 0;
|
||||
protected int maxValueLength = 0;
|
||||
protected int totalBytes = 0;//
|
||||
|
||||
|
||||
/***************** construct ***********************/
|
||||
|
||||
public PrefixTreeEncoder(OutputStream outputStream, boolean includeMvccVersion) {
|
||||
// used during cell accumulation
|
||||
this.blockMeta = new PrefixTreeBlockMeta();
|
||||
this.rowRange = new ByteRange();
|
||||
this.familyRange = new ByteRange();
|
||||
this.qualifierRange = new ByteRange();
|
||||
this.timestamps = new long[INITIAL_PER_CELL_ARRAY_SIZES];
|
||||
this.mvccVersions = new long[INITIAL_PER_CELL_ARRAY_SIZES];
|
||||
this.typeBytes = new byte[INITIAL_PER_CELL_ARRAY_SIZES];
|
||||
this.valueOffsets = new int[INITIAL_PER_CELL_ARRAY_SIZES];
|
||||
this.values = new byte[VALUE_BUFFER_INIT_SIZE];
|
||||
|
||||
// used during compilation
|
||||
this.familyDeduplicator = USE_HASH_COLUMN_SORTER ? new ByteRangeHashSet()
|
||||
: new ByteRangeTreeSet();
|
||||
this.qualifierDeduplicator = USE_HASH_COLUMN_SORTER ? new ByteRangeHashSet()
|
||||
: new ByteRangeTreeSet();
|
||||
this.timestampEncoder = new LongEncoder();
|
||||
this.mvccVersionEncoder = new LongEncoder();
|
||||
this.cellTypeEncoder = new CellTypeEncoder();
|
||||
this.rowTokenizer = new Tokenizer();
|
||||
this.familyTokenizer = new Tokenizer();
|
||||
this.qualifierTokenizer = new Tokenizer();
|
||||
this.rowWriter = new RowSectionWriter();
|
||||
this.familyWriter = new ColumnSectionWriter();
|
||||
this.qualifierWriter = new ColumnSectionWriter();
|
||||
|
||||
reset(outputStream, includeMvccVersion);
|
||||
}
|
||||
|
||||
public void reset(OutputStream outputStream, boolean includeMvccVersion) {
|
||||
++numResets;
|
||||
this.includeMvccVersion = includeMvccVersion;
|
||||
this.outputStream = outputStream;
|
||||
valueOffsets[0] = 0;
|
||||
|
||||
familyDeduplicator.reset();
|
||||
qualifierDeduplicator.reset();
|
||||
rowTokenizer.reset();
|
||||
timestampEncoder.reset();
|
||||
mvccVersionEncoder.reset();
|
||||
cellTypeEncoder.reset();
|
||||
familyTokenizer.reset();
|
||||
qualifierTokenizer.reset();
|
||||
rowWriter.reset();
|
||||
familyWriter.reset();
|
||||
qualifierWriter.reset();
|
||||
|
||||
totalCells = 0;
|
||||
totalUnencodedBytes = 0;
|
||||
totalValueBytes = 0;
|
||||
maxValueLength = 0;
|
||||
totalBytes = 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check that the arrays used to hold cell fragments are large enough for the cell that is being
|
||||
* added. Since the PrefixTreeEncoder is cached between uses, these arrays may grow during the
|
||||
* first few block encodings but should stabilize quickly.
|
||||
*/
|
||||
protected void ensurePerCellCapacities() {
|
||||
int currentCapacity = valueOffsets.length;
|
||||
int neededCapacity = totalCells + 2;// some things write one index ahead. +2 to be safe
|
||||
if (neededCapacity < currentCapacity) {
|
||||
return;
|
||||
}
|
||||
|
||||
int padding = neededCapacity;//this will double the array size
|
||||
timestamps = ArrayUtils.growIfNecessary(timestamps, neededCapacity, padding);
|
||||
mvccVersions = ArrayUtils.growIfNecessary(mvccVersions, neededCapacity, padding);
|
||||
typeBytes = ArrayUtils.growIfNecessary(typeBytes, neededCapacity, padding);
|
||||
valueOffsets = ArrayUtils.growIfNecessary(valueOffsets, neededCapacity, padding);
|
||||
}
|
||||
|
||||
/******************** CellOutputStream methods *************************/
|
||||
|
||||
/**
|
||||
* Note: Unused until support is added to the scanner/heap
|
||||
* <p/>
|
||||
* The following method are optimized versions of write(Cell cell). The result should be
|
||||
* identical, however the implementation may be able to execute them much more efficiently because
|
||||
* it does not need to compare the unchanged fields with the previous cell's.
|
||||
* <p/>
|
||||
* Consider the benefits during compaction when paired with a CellScanner that is also aware of
|
||||
* row boundaries. The CellScanner can easily use these methods instead of blindly passing Cells
|
||||
* to the write(Cell cell) method.
|
||||
* <p/>
|
||||
* The savings of skipping duplicate row detection are significant with long row keys. A
|
||||
* DataBlockEncoder may store a row key once in combination with a count of how many cells are in
|
||||
* the row. With a 100 byte row key, we can replace 100 byte comparisons with a single increment
|
||||
* of the counter, and that is for every cell in the row.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Add a Cell to the output stream but repeat the previous row.
|
||||
*/
|
||||
//@Override
|
||||
public void writeWithRepeatRow(Cell cell) {
|
||||
ensurePerCellCapacities();//can we optimize away some of this?
|
||||
|
||||
//save a relatively expensive row comparison, incrementing the row's counter instead
|
||||
rowTokenizer.incrementNumOccurrencesOfLatestValue();
|
||||
addFamilyPart(cell);
|
||||
addQualifierPart(cell);
|
||||
addAfterRowFamilyQualifier(cell);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public void write(Cell cell) {
|
||||
ensurePerCellCapacities();
|
||||
|
||||
rowTokenizer.addSorted(CellTool.fillRowRange(cell, rowRange));
|
||||
addFamilyPart(cell);
|
||||
addQualifierPart(cell);
|
||||
addAfterRowFamilyQualifier(cell);
|
||||
}
|
||||
|
||||
|
||||
/***************** internal add methods ************************/
|
||||
|
||||
private void addAfterRowFamilyQualifier(Cell cell){
|
||||
// timestamps
|
||||
timestamps[totalCells] = cell.getTimestamp();
|
||||
timestampEncoder.add(cell.getTimestamp());
|
||||
|
||||
// memstore timestamps
|
||||
if (includeMvccVersion) {
|
||||
mvccVersions[totalCells] = cell.getMvccVersion();
|
||||
mvccVersionEncoder.add(cell.getMvccVersion());
|
||||
totalUnencodedBytes += WritableUtils.getVIntSize(cell.getMvccVersion());
|
||||
}else{
|
||||
//must overwrite in case there was a previous version in this array slot
|
||||
mvccVersions[totalCells] = 0L;
|
||||
if(totalCells == 0){//only need to do this for the first cell added
|
||||
mvccVersionEncoder.add(0L);
|
||||
}
|
||||
//totalUncompressedBytes += 0;//mvccVersion takes zero bytes when disabled
|
||||
}
|
||||
|
||||
// types
|
||||
typeBytes[totalCells] = cell.getTypeByte();
|
||||
cellTypeEncoder.add(cell.getTypeByte());
|
||||
|
||||
// values
|
||||
totalValueBytes += cell.getValueLength();
|
||||
// double the array each time we run out of space
|
||||
values = ArrayUtils.growIfNecessary(values, totalValueBytes, 2 * totalValueBytes);
|
||||
CellTool.copyValueTo(cell, values, valueOffsets[totalCells]);
|
||||
if (cell.getValueLength() > maxValueLength) {
|
||||
maxValueLength = cell.getValueLength();
|
||||
}
|
||||
valueOffsets[totalCells + 1] = totalValueBytes;
|
||||
|
||||
// general
|
||||
totalUnencodedBytes += KeyValueTool.length(cell);
|
||||
++totalCells;
|
||||
}
|
||||
|
||||
private void addFamilyPart(Cell cell) {
|
||||
if (MULITPLE_FAMILIES_POSSIBLE || totalCells == 0) {
|
||||
CellTool.fillFamilyRange(cell, familyRange);
|
||||
familyDeduplicator.add(familyRange);
|
||||
}
|
||||
}
|
||||
|
||||
private void addQualifierPart(Cell cell) {
|
||||
CellTool.fillQualifierRange(cell, qualifierRange);
|
||||
qualifierDeduplicator.add(qualifierRange);
|
||||
}
|
||||
|
||||
|
||||
/****************** compiling/flushing ********************/
|
||||
|
||||
/**
|
||||
* Expensive method. The second half of the encoding work happens here.
|
||||
*
|
||||
* Take all the separate accumulated data structures and turn them into a single stream of bytes
|
||||
* which is written to the outputStream.
|
||||
*/
|
||||
@Override
|
||||
public void flush() throws IOException {
|
||||
compile();
|
||||
|
||||
// do the actual flushing to the output stream. Order matters.
|
||||
blockMeta.writeVariableBytesToOutputStream(outputStream);
|
||||
rowWriter.writeBytes(outputStream);
|
||||
familyWriter.writeBytes(outputStream);
|
||||
qualifierWriter.writeBytes(outputStream);
|
||||
timestampEncoder.writeBytes(outputStream);
|
||||
mvccVersionEncoder.writeBytes(outputStream);
|
||||
//CellType bytes are in the row nodes. there is no additional type section
|
||||
outputStream.write(values, 0, totalValueBytes);
|
||||
}
|
||||
|
||||
/**
|
||||
* Now that all the cells have been added, do the work to reduce them to a series of byte[]
|
||||
* fragments that are ready to be written to the output stream.
|
||||
*/
|
||||
protected void compile(){
|
||||
blockMeta.setNumKeyValueBytes(totalUnencodedBytes);
|
||||
int lastValueOffset = valueOffsets[totalCells];
|
||||
blockMeta.setValueOffsetWidth(UFIntTool.numBytes(lastValueOffset));
|
||||
blockMeta.setValueLengthWidth(UFIntTool.numBytes(maxValueLength));
|
||||
blockMeta.setNumValueBytes(totalValueBytes);
|
||||
totalBytes += totalValueBytes;
|
||||
|
||||
//these compile methods will add to totalBytes
|
||||
compileTypes();
|
||||
compileMvccVersions();
|
||||
compileTimestamps();
|
||||
compileQualifiers();
|
||||
compileFamilies();
|
||||
compileRows();
|
||||
|
||||
int numMetaBytes = blockMeta.calculateNumMetaBytes();
|
||||
blockMeta.setNumMetaBytes(numMetaBytes);
|
||||
totalBytes += numMetaBytes;
|
||||
}
|
||||
|
||||
/**
|
||||
* The following "compile" methods do any intermediate work necessary to transform the cell
|
||||
* fragments collected during the writing phase into structures that are ready to write to the
|
||||
* outputStream.
|
||||
* <p/>
|
||||
* The family and qualifier treatment is almost identical, as is timestamp and mvccVersion.
|
||||
*/
|
||||
|
||||
protected void compileTypes() {
|
||||
blockMeta.setAllSameType(cellTypeEncoder.areAllSameType());
|
||||
if(cellTypeEncoder.areAllSameType()){
|
||||
blockMeta.setAllTypes(cellTypeEncoder.getOnlyType());
|
||||
}
|
||||
}
|
||||
|
||||
protected void compileMvccVersions() {
|
||||
mvccVersionEncoder.compile();
|
||||
blockMeta.setMvccVersionFields(mvccVersionEncoder);
|
||||
int numMvccVersionBytes = mvccVersionEncoder.getOutputArrayLength();
|
||||
totalBytes += numMvccVersionBytes;
|
||||
}
|
||||
|
||||
protected void compileTimestamps() {
|
||||
timestampEncoder.compile();
|
||||
blockMeta.setTimestampFields(timestampEncoder);
|
||||
int numTimestampBytes = timestampEncoder.getOutputArrayLength();
|
||||
totalBytes += numTimestampBytes;
|
||||
}
|
||||
|
||||
protected void compileQualifiers() {
|
||||
blockMeta.setNumUniqueQualifiers(qualifierDeduplicator.size());
|
||||
qualifierDeduplicator.compile();
|
||||
qualifierTokenizer.addAll(qualifierDeduplicator.getSortedRanges());
|
||||
qualifierWriter.reconstruct(blockMeta, qualifierTokenizer, false);
|
||||
qualifierWriter.compile();
|
||||
int numQualifierBytes = qualifierWriter.getNumBytes();
|
||||
blockMeta.setNumQualifierBytes(numQualifierBytes);
|
||||
totalBytes += numQualifierBytes;
|
||||
}
|
||||
|
||||
protected void compileFamilies() {
|
||||
blockMeta.setNumUniqueFamilies(familyDeduplicator.size());
|
||||
familyDeduplicator.compile();
|
||||
familyTokenizer.addAll(familyDeduplicator.getSortedRanges());
|
||||
familyWriter.reconstruct(blockMeta, familyTokenizer, true);
|
||||
familyWriter.compile();
|
||||
int numFamilyBytes = familyWriter.getNumBytes();
|
||||
blockMeta.setNumFamilyBytes(numFamilyBytes);
|
||||
totalBytes += numFamilyBytes;
|
||||
}
|
||||
|
||||
protected void compileRows() {
|
||||
rowWriter.reconstruct(this);
|
||||
rowWriter.compile();
|
||||
int numRowBytes = rowWriter.getNumBytes();
|
||||
blockMeta.setNumRowBytes(numRowBytes);
|
||||
blockMeta.setRowTreeDepth(rowTokenizer.getTreeDepth());
|
||||
totalBytes += numRowBytes;
|
||||
}
|
||||
|
||||
/********************* convenience getters ********************************/
|
||||
|
||||
public long getValueOffset(int index) {
|
||||
return valueOffsets[index];
|
||||
}
|
||||
|
||||
public int getValueLength(int index) {
|
||||
return (int) (valueOffsets[index + 1] - valueOffsets[index]);
|
||||
}
|
||||
|
||||
/************************* get/set *************************************/
|
||||
|
||||
public PrefixTreeBlockMeta getBlockMeta() {
|
||||
return blockMeta;
|
||||
}
|
||||
|
||||
public Tokenizer getRowTokenizer() {
|
||||
return rowTokenizer;
|
||||
}
|
||||
|
||||
public LongEncoder getTimestampEncoder() {
|
||||
return timestampEncoder;
|
||||
}
|
||||
|
||||
public int getTotalBytes() {
|
||||
return totalBytes;
|
||||
}
|
||||
|
||||
public long[] getTimestamps() {
|
||||
return timestamps;
|
||||
}
|
||||
|
||||
public long[] getMvccVersions() {
|
||||
return mvccVersions;
|
||||
}
|
||||
|
||||
public byte[] getTypeBytes() {
|
||||
return typeBytes;
|
||||
}
|
||||
|
||||
public LongEncoder getMvccVersionEncoder() {
|
||||
return mvccVersionEncoder;
|
||||
}
|
||||
|
||||
public ByteRangeSet getFamilySorter() {
|
||||
return familyDeduplicator;
|
||||
}
|
||||
|
||||
public ByteRangeSet getQualifierSorter() {
|
||||
return qualifierDeduplicator;
|
||||
}
|
||||
|
||||
public ColumnSectionWriter getFamilyWriter() {
|
||||
return familyWriter;
|
||||
}
|
||||
|
||||
public ColumnSectionWriter getQualifierWriter() {
|
||||
return qualifierWriter;
|
||||
}
|
||||
|
||||
public RowSectionWriter getRowWriter() {
|
||||
return rowWriter;
|
||||
}
|
||||
|
||||
public ByteRange getValueByteRange() {
|
||||
return new ByteRange(values, 0, totalValueBytes);
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,64 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hbase.codec.prefixtree.encode;
|
||||
|
||||
import java.io.OutputStream;
|
||||
|
||||
import org.apache.hadoop.classification.InterfaceAudience;
|
||||
|
||||
|
||||
/**
|
||||
* Pool to enable reusing the Encoder objects which can consist of thousands of smaller objects and
|
||||
* would be more garbage than the data in the block. A new encoder is needed for each block in
|
||||
* a flush, compaction, RPC response, etc.
|
||||
*
|
||||
* It is not a pool in the traditional sense, but implements the semantics of a traditional pool
|
||||
* via ThreadLocals to avoid sharing between threads. Sharing between threads would not be
|
||||
* very expensive given that it's accessed per-block, but this is just as easy.
|
||||
*
|
||||
* This pool implementation assumes there is a one-to-one mapping between a single thread and a
|
||||
* single flush or compaction.
|
||||
*/
|
||||
@InterfaceAudience.Private
|
||||
public class ThreadLocalEncoderPool implements EncoderPool{
|
||||
|
||||
private static final ThreadLocal<PrefixTreeEncoder> ENCODER
|
||||
= new ThreadLocal<PrefixTreeEncoder>();
|
||||
|
||||
/**
|
||||
* Get the encoder attached to the current ThreadLocal, or create a new one and attach it to the
|
||||
* current thread.
|
||||
*/
|
||||
@Override
|
||||
public PrefixTreeEncoder checkOut(OutputStream os, boolean includeMvccVersion) {
|
||||
PrefixTreeEncoder builder = ENCODER.get();
|
||||
builder = EncoderFactory.prepareEncoder(builder, os, includeMvccVersion);
|
||||
ENCODER.set(builder);
|
||||
return builder;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void checkIn(PrefixTreeEncoder encoder) {
|
||||
// attached to thread on checkOut, so shouldn't need to do anything here
|
||||
|
||||
// do we need to worry about detaching encoders from compaction threads or are the same threads
|
||||
// used over and over
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,131 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hbase.codec.prefixtree.encode.column;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.OutputStream;
|
||||
|
||||
import org.apache.hadoop.classification.InterfaceAudience;
|
||||
import org.apache.hadoop.hbase.util.ByteRange;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
import org.apache.hadoop.hbase.util.Strings;
|
||||
import org.apache.hbase.codec.prefixtree.PrefixTreeBlockMeta;
|
||||
import org.apache.hbase.codec.prefixtree.encode.tokenize.TokenizerNode;
|
||||
import org.apache.hbase.util.vint.UFIntTool;
|
||||
import org.apache.hbase.util.vint.UVIntTool;
|
||||
|
||||
/**
|
||||
* Column nodes can be either family nodes or qualifier nodes, as both sections encode similarly.
|
||||
* The family and qualifier sections of the data block are made of 1 or more of these nodes.
|
||||
* <p/>
|
||||
* Each node is composed of 3 sections:<br/>
|
||||
* <li>tokenLength: UVInt (normally 1 byte) indicating the number of token bytes
|
||||
* <li>token[]: the actual token bytes
|
||||
* <li>parentStartPosition: the offset of the next node from the start of the family or qualifier
|
||||
* section
|
||||
*/
|
||||
@InterfaceAudience.Private
|
||||
public class ColumnNodeWriter{
|
||||
|
||||
/************* fields ****************************/
|
||||
|
||||
protected TokenizerNode builderNode;
|
||||
protected PrefixTreeBlockMeta blockMeta;
|
||||
|
||||
protected boolean familyVsQualifier;
|
||||
|
||||
protected int tokenLength;
|
||||
protected byte[] token;
|
||||
protected int parentStartPosition;
|
||||
|
||||
|
||||
/*************** construct **************************/
|
||||
|
||||
public ColumnNodeWriter(PrefixTreeBlockMeta blockMeta, TokenizerNode builderNode,
|
||||
boolean familyVsQualifier) {
|
||||
this.blockMeta = blockMeta;
|
||||
this.builderNode = builderNode;
|
||||
this.familyVsQualifier = familyVsQualifier;
|
||||
calculateTokenLength();
|
||||
}
|
||||
|
||||
|
||||
/************* methods *******************************/
|
||||
|
||||
public boolean isRoot() {
|
||||
return parentStartPosition == 0;
|
||||
}
|
||||
|
||||
private void calculateTokenLength() {
|
||||
tokenLength = builderNode.getTokenLength();
|
||||
token = new byte[tokenLength];
|
||||
}
|
||||
|
||||
/**
|
||||
* This method is called before blockMeta.qualifierOffsetWidth is known, so we pass in a
|
||||
* placeholder.
|
||||
* @param offsetWidthPlaceholder the placeholder
|
||||
* @return node width
|
||||
*/
|
||||
public int getWidthUsingPlaceholderForOffsetWidth(int offsetWidthPlaceholder) {
|
||||
int width = 0;
|
||||
width += UVIntTool.numBytes(tokenLength);
|
||||
width += token.length;
|
||||
width += offsetWidthPlaceholder;
|
||||
return width;
|
||||
}
|
||||
|
||||
public void writeBytes(OutputStream os) throws IOException {
|
||||
int parentOffsetWidth;
|
||||
if (familyVsQualifier) {
|
||||
parentOffsetWidth = blockMeta.getFamilyOffsetWidth();
|
||||
} else {
|
||||
parentOffsetWidth = blockMeta.getQualifierOffsetWidth();
|
||||
}
|
||||
UVIntTool.writeBytes(tokenLength, os);
|
||||
os.write(token);
|
||||
UFIntTool.writeBytes(parentOffsetWidth, parentStartPosition, os);
|
||||
}
|
||||
|
||||
public void setTokenBytes(ByteRange source) {
|
||||
source.deepCopySubRangeTo(0, tokenLength, token, 0);
|
||||
}
|
||||
|
||||
|
||||
/****************** standard methods ************************/
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
sb.append(Strings.padFront(builderNode.getOutputArrayOffset() + "", ' ', 3) + ",");
|
||||
sb.append("[");
|
||||
sb.append(Bytes.toString(token));
|
||||
sb.append("]->");
|
||||
sb.append(parentStartPosition);
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
|
||||
/************************** get/set ***********************/
|
||||
|
||||
public void setParentStartPosition(int parentStartPosition) {
|
||||
this.parentStartPosition = parentStartPosition;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,201 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hbase.codec.prefixtree.encode.column;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.OutputStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.hadoop.classification.InterfaceAudience;
|
||||
import org.apache.hadoop.hbase.util.CollectionUtils;
|
||||
import org.apache.hbase.codec.prefixtree.PrefixTreeBlockMeta;
|
||||
import org.apache.hbase.codec.prefixtree.encode.tokenize.Tokenizer;
|
||||
import org.apache.hbase.codec.prefixtree.encode.tokenize.TokenizerNode;
|
||||
import org.apache.hbase.util.vint.UFIntTool;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
|
||||
/**
|
||||
* Takes the tokenized family or qualifier data and flattens it into a stream of bytes. The family
|
||||
* section is written after the row section, and qualifier section after family section.
|
||||
* <p/>
|
||||
* The family and qualifier tries, or "column tries", are structured differently than the row trie.
|
||||
* The trie cannot be reassembled without external data about the offsets of the leaf nodes, and
|
||||
* these external pointers are stored in the nubs and leaves of the row trie. For each cell in a
|
||||
* row, the row trie contains a list of offsets into the column sections (along with pointers to
|
||||
* timestamps and other per-cell fields). These offsets point to the last column node/token that
|
||||
* comprises the column name. To assemble the column name, the trie is traversed in reverse (right
|
||||
* to left), with the rightmost tokens pointing to the start of their "parent" node which is the
|
||||
* node to the left.
|
||||
* <p/>
|
||||
* This choice was made to reduce the size of the column trie by storing the minimum amount of
|
||||
* offset data. As a result, to find a specific qualifier within a row, you must do a binary search
|
||||
* of the column nodes, reassembling each one as you search. Future versions of the PrefixTree might
|
||||
* encode the columns in both a forward and reverse trie, which would convert binary searches into
|
||||
* more efficient trie searches which would be beneficial for wide rows.
|
||||
*/
|
||||
@InterfaceAudience.Private
|
||||
public class ColumnSectionWriter {
|
||||
|
||||
public static final int EXPECTED_NUBS_PLUS_LEAVES = 100;
|
||||
|
||||
/****************** fields ****************************/
|
||||
|
||||
private PrefixTreeBlockMeta blockMeta;
|
||||
|
||||
private boolean familyVsQualifier;
|
||||
private Tokenizer tokenizer;
|
||||
private int numBytes = 0;
|
||||
private ArrayList<TokenizerNode> nonLeaves;
|
||||
private ArrayList<TokenizerNode> leaves;
|
||||
private ArrayList<TokenizerNode> allNodes;
|
||||
private ArrayList<ColumnNodeWriter> columnNodeWriters;
|
||||
private List<Integer> outputArrayOffsets;
|
||||
|
||||
|
||||
/*********************** construct *********************/
|
||||
|
||||
public ColumnSectionWriter() {
|
||||
this.nonLeaves = Lists.newArrayList();
|
||||
this.leaves = Lists.newArrayList();
|
||||
this.outputArrayOffsets = Lists.newArrayList();
|
||||
}
|
||||
|
||||
public ColumnSectionWriter(PrefixTreeBlockMeta blockMeta, Tokenizer builder,
|
||||
boolean familyVsQualifier) {
|
||||
this();// init collections
|
||||
reconstruct(blockMeta, builder, familyVsQualifier);
|
||||
}
|
||||
|
||||
public void reconstruct(PrefixTreeBlockMeta blockMeta, Tokenizer builder,
|
||||
boolean familyVsQualifier) {
|
||||
this.blockMeta = blockMeta;
|
||||
this.tokenizer = builder;
|
||||
this.familyVsQualifier = familyVsQualifier;
|
||||
}
|
||||
|
||||
public void reset() {
|
||||
numBytes = 0;
|
||||
nonLeaves.clear();
|
||||
leaves.clear();
|
||||
outputArrayOffsets.clear();
|
||||
}
|
||||
|
||||
|
||||
/****************** methods *******************************/
|
||||
|
||||
public ColumnSectionWriter compile() {
|
||||
if (familyVsQualifier) {
|
||||
// do nothing. max family length fixed at Byte.MAX_VALUE
|
||||
} else {
|
||||
blockMeta.setMaxQualifierLength(tokenizer.getMaxElementLength());
|
||||
}
|
||||
|
||||
tokenizer.setNodeFirstInsertionIndexes();
|
||||
|
||||
tokenizer.appendNodes(nonLeaves, true, false);
|
||||
|
||||
tokenizer.appendNodes(leaves, false, true);
|
||||
|
||||
allNodes = Lists.newArrayListWithCapacity(nonLeaves.size() + leaves.size());
|
||||
allNodes.addAll(nonLeaves);
|
||||
allNodes.addAll(leaves);
|
||||
|
||||
columnNodeWriters = Lists.newArrayListWithCapacity(CollectionUtils.nullSafeSize(allNodes));
|
||||
for (int i = 0; i < allNodes.size(); ++i) {
|
||||
TokenizerNode node = allNodes.get(i);
|
||||
columnNodeWriters.add(new ColumnNodeWriter(blockMeta, node, familyVsQualifier));
|
||||
}
|
||||
|
||||
// leaf widths are known at this point, so add them up
|
||||
int totalBytesWithoutOffsets = 0;
|
||||
for (int i = allNodes.size() - 1; i >= 0; --i) {
|
||||
ColumnNodeWriter columnNodeWriter = columnNodeWriters.get(i);
|
||||
// leaves store all but their first token byte
|
||||
totalBytesWithoutOffsets += columnNodeWriter.getWidthUsingPlaceholderForOffsetWidth(0);
|
||||
}
|
||||
|
||||
// figure out how wide our offset FInts are
|
||||
int parentOffsetWidth = 0;
|
||||
while (true) {
|
||||
++parentOffsetWidth;
|
||||
int numBytesFinder = totalBytesWithoutOffsets + parentOffsetWidth * allNodes.size();
|
||||
if (numBytesFinder < UFIntTool.maxValueForNumBytes(parentOffsetWidth)) {
|
||||
numBytes = numBytesFinder;
|
||||
break;
|
||||
}// it fits
|
||||
}
|
||||
if (familyVsQualifier) {
|
||||
blockMeta.setFamilyOffsetWidth(parentOffsetWidth);
|
||||
} else {
|
||||
blockMeta.setQualifierOffsetWidth(parentOffsetWidth);
|
||||
}
|
||||
|
||||
int forwardIndex = 0;
|
||||
for (int i = 0; i < allNodes.size(); ++i) {
|
||||
TokenizerNode node = allNodes.get(i);
|
||||
ColumnNodeWriter columnNodeWriter = columnNodeWriters.get(i);
|
||||
int fullNodeWidth = columnNodeWriter
|
||||
.getWidthUsingPlaceholderForOffsetWidth(parentOffsetWidth);
|
||||
node.setOutputArrayOffset(forwardIndex);
|
||||
columnNodeWriter.setTokenBytes(node.getToken());
|
||||
if (node.isRoot()) {
|
||||
columnNodeWriter.setParentStartPosition(0);
|
||||
} else {
|
||||
columnNodeWriter.setParentStartPosition(node.getParent().getOutputArrayOffset());
|
||||
}
|
||||
forwardIndex += fullNodeWidth;
|
||||
}
|
||||
|
||||
tokenizer.appendOutputArrayOffsets(outputArrayOffsets);
|
||||
|
||||
return this;
|
||||
}
|
||||
|
||||
public void writeBytes(OutputStream os) throws IOException {
|
||||
for (ColumnNodeWriter columnNodeWriter : columnNodeWriters) {
|
||||
columnNodeWriter.writeBytes(os);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/************* get/set **************************/
|
||||
|
||||
public ArrayList<ColumnNodeWriter> getColumnNodeWriters() {
|
||||
return columnNodeWriters;
|
||||
}
|
||||
|
||||
public int getNumBytes() {
|
||||
return numBytes;
|
||||
}
|
||||
|
||||
public int getOutputArrayOffset(int sortedIndex) {
|
||||
return outputArrayOffsets.get(sortedIndex);
|
||||
}
|
||||
|
||||
public ArrayList<TokenizerNode> getNonLeaves() {
|
||||
return nonLeaves;
|
||||
}
|
||||
|
||||
public ArrayList<TokenizerNode> getLeaves() {
|
||||
return leaves;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,68 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hbase.codec.prefixtree.encode.other;
|
||||
|
||||
import org.apache.hadoop.classification.InterfaceAudience;
|
||||
|
||||
/**
|
||||
* Detect if every KV has the same KeyValue.Type, in which case we don't need to store it for each
|
||||
* KV. If(allSameType) during conversion to byte[], then we can store the "onlyType" in blockMeta,
|
||||
* therefore not repeating it for each cell and saving 1 byte per cell.
|
||||
*/
|
||||
@InterfaceAudience.Private
|
||||
public class CellTypeEncoder {
|
||||
|
||||
/************* fields *********************/
|
||||
|
||||
protected boolean pendingFirstType = true;
|
||||
protected boolean allSameType = true;
|
||||
protected byte onlyType;
|
||||
|
||||
|
||||
/************* construct *********************/
|
||||
|
||||
public void reset() {
|
||||
pendingFirstType = true;
|
||||
allSameType = true;
|
||||
}
|
||||
|
||||
|
||||
/************* methods *************************/
|
||||
|
||||
public void add(byte type) {
|
||||
if (pendingFirstType) {
|
||||
onlyType = type;
|
||||
pendingFirstType = false;
|
||||
} else if (onlyType != type) {
|
||||
allSameType = false;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**************** get/set **************************/
|
||||
|
||||
public boolean areAllSameType() {
|
||||
return allSameType;
|
||||
}
|
||||
|
||||
public byte getOnlyType() {
|
||||
return onlyType;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,183 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hbase.codec.prefixtree.encode.other;
|
||||
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.OutputStream;
|
||||
import java.util.Arrays;
|
||||
import java.util.HashSet;
|
||||
|
||||
import org.apache.hadoop.classification.InterfaceAudience;
|
||||
import org.apache.hadoop.hbase.util.ArrayUtils;
|
||||
import org.apache.hadoop.hbase.util.CollectionUtils;
|
||||
import org.apache.hbase.util.vint.UFIntTool;
|
||||
|
||||
import com.google.common.base.Joiner;
|
||||
|
||||
/**
|
||||
* Used to de-duplicate, sort, minimize/diff, and serialize timestamps and mvccVersions from a
|
||||
* collection of Cells.
|
||||
*
|
||||
* 1. add longs to a HashSet for fast de-duplication
|
||||
* 2. keep track of the min and max
|
||||
* 3. copy all values to a new long[]
|
||||
* 4. Collections.sort the long[]
|
||||
* 5. calculate maxDelta = max - min
|
||||
* 6. determine FInt width based on maxDelta
|
||||
* 7. PrefixTreeEncoder binary searches to find index of each value
|
||||
*/
|
||||
@InterfaceAudience.Private
|
||||
public class LongEncoder {
|
||||
|
||||
/****************** fields ****************************/
|
||||
|
||||
protected HashSet<Long> uniqueValues;
|
||||
protected long[] sortedUniqueValues;
|
||||
protected long min, max, maxDelta;
|
||||
|
||||
protected int bytesPerDelta;
|
||||
protected int bytesPerIndex;
|
||||
protected int totalCompressedBytes;
|
||||
|
||||
|
||||
/****************** construct ****************************/
|
||||
|
||||
public LongEncoder() {
|
||||
this.uniqueValues = new HashSet<Long>();
|
||||
}
|
||||
|
||||
public void reset() {
|
||||
uniqueValues.clear();
|
||||
sortedUniqueValues = null;
|
||||
min = Long.MAX_VALUE;
|
||||
max = Long.MIN_VALUE;
|
||||
maxDelta = Long.MIN_VALUE;
|
||||
bytesPerIndex = 0;
|
||||
bytesPerDelta = 0;
|
||||
totalCompressedBytes = 0;
|
||||
}
|
||||
|
||||
|
||||
/************* methods ***************************/
|
||||
|
||||
public void add(long timestamp) {
|
||||
uniqueValues.add(timestamp);
|
||||
}
|
||||
|
||||
public LongEncoder compile() {
|
||||
int numUnique = uniqueValues.size();
|
||||
if (numUnique == 1) {
|
||||
min = CollectionUtils.getFirst(uniqueValues);
|
||||
sortedUniqueValues = new long[] { min };
|
||||
return this;
|
||||
}
|
||||
|
||||
sortedUniqueValues = new long[numUnique];
|
||||
int lastIndex = -1;
|
||||
for (long value : uniqueValues) {
|
||||
sortedUniqueValues[++lastIndex] = value;
|
||||
}
|
||||
Arrays.sort(sortedUniqueValues);
|
||||
min = ArrayUtils.getFirst(sortedUniqueValues);
|
||||
max = ArrayUtils.getLast(sortedUniqueValues);
|
||||
maxDelta = max - min;
|
||||
if (maxDelta > 0) {
|
||||
bytesPerDelta = UFIntTool.numBytes(maxDelta);
|
||||
} else {
|
||||
bytesPerDelta = 0;
|
||||
}
|
||||
|
||||
int maxIndex = numUnique - 1;
|
||||
bytesPerIndex = UFIntTool.numBytes(maxIndex);
|
||||
|
||||
totalCompressedBytes = numUnique * bytesPerDelta;
|
||||
|
||||
return this;
|
||||
}
|
||||
|
||||
public long getDelta(int index) {
|
||||
if (sortedUniqueValues.length == 0) {
|
||||
return 0;
|
||||
}
|
||||
return sortedUniqueValues[index] - min;
|
||||
}
|
||||
|
||||
public int getIndex(long value) {
|
||||
// should always find an exact match
|
||||
return Arrays.binarySearch(sortedUniqueValues, value);
|
||||
}
|
||||
|
||||
public void writeBytes(OutputStream os) throws IOException {
|
||||
for (int i = 0; i < sortedUniqueValues.length; ++i) {
|
||||
long delta = sortedUniqueValues[i] - min;
|
||||
UFIntTool.writeBytes(bytesPerDelta, delta, os);
|
||||
}
|
||||
}
|
||||
|
||||
//convenience method for tests
|
||||
public byte[] getByteArray() throws IOException{
|
||||
ByteArrayOutputStream baos = new ByteArrayOutputStream();
|
||||
writeBytes(baos);
|
||||
return baos.toByteArray();
|
||||
}
|
||||
|
||||
public int getOutputArrayLength() {
|
||||
return sortedUniqueValues.length * bytesPerDelta;
|
||||
}
|
||||
|
||||
public int getNumUniqueValues() {
|
||||
return sortedUniqueValues.length;
|
||||
}
|
||||
|
||||
|
||||
/******************* Object methods **********************/
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
if (ArrayUtils.isEmpty(sortedUniqueValues)) {
|
||||
return "[]";
|
||||
}
|
||||
return "[" + Joiner.on(",").join(ArrayUtils.toList(sortedUniqueValues)) + "]";
|
||||
}
|
||||
|
||||
|
||||
/******************** get/set **************************/
|
||||
|
||||
public long getMin() {
|
||||
return min;
|
||||
}
|
||||
|
||||
public int getBytesPerDelta() {
|
||||
return bytesPerDelta;
|
||||
}
|
||||
|
||||
public int getBytesPerIndex() {
|
||||
return bytesPerIndex;
|
||||
}
|
||||
|
||||
public int getTotalCompressedBytes() {
|
||||
return totalCompressedBytes;
|
||||
}
|
||||
|
||||
public long[] getSortedUniqueTimestamps() {
|
||||
return sortedUniqueValues;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,285 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hbase.codec.prefixtree.encode.row;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.OutputStream;
|
||||
import java.util.ArrayList;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.hadoop.classification.InterfaceAudience;
|
||||
import org.apache.hadoop.hbase.util.ByteRangeTool;
|
||||
import org.apache.hadoop.hbase.util.CollectionUtils;
|
||||
import org.apache.hbase.codec.prefixtree.PrefixTreeBlockMeta;
|
||||
import org.apache.hbase.codec.prefixtree.encode.PrefixTreeEncoder;
|
||||
import org.apache.hbase.codec.prefixtree.encode.tokenize.TokenizerNode;
|
||||
import org.apache.hbase.util.vint.UFIntTool;
|
||||
import org.apache.hbase.util.vint.UVIntTool;
|
||||
|
||||
/**
|
||||
* Serializes the fields comprising one node of the row trie, which can be a branch, nub, or leaf.
|
||||
* Please see the write() method for the order in which data is written.
|
||||
*/
|
||||
@InterfaceAudience.Private
|
||||
public class RowNodeWriter{
|
||||
protected static final Log LOG = LogFactory.getLog(RowNodeWriter.class);
|
||||
|
||||
/********************* fields ******************************/
|
||||
|
||||
protected PrefixTreeEncoder prefixTreeEncoder;
|
||||
protected PrefixTreeBlockMeta blockMeta;
|
||||
protected TokenizerNode tokenizerNode;
|
||||
|
||||
protected int tokenWidth;
|
||||
protected int fanOut;
|
||||
protected int numCells;
|
||||
|
||||
protected int width;
|
||||
|
||||
|
||||
/*********************** construct *************************/
|
||||
|
||||
public RowNodeWriter(PrefixTreeEncoder keyValueBuilder, TokenizerNode tokenizerNode) {
|
||||
reconstruct(keyValueBuilder, tokenizerNode);
|
||||
}
|
||||
|
||||
public void reconstruct(PrefixTreeEncoder prefixTreeEncoder, TokenizerNode tokenizerNode) {
|
||||
this.prefixTreeEncoder = prefixTreeEncoder;
|
||||
reset(tokenizerNode);
|
||||
}
|
||||
|
||||
public void reset(TokenizerNode node) {
|
||||
this.blockMeta = prefixTreeEncoder.getBlockMeta();// changes between blocks
|
||||
this.tokenizerNode = node;
|
||||
this.tokenWidth = 0;
|
||||
this.fanOut = 0;
|
||||
this.numCells = 0;
|
||||
this.width = 0;
|
||||
calculateOffsetsAndLengths();
|
||||
}
|
||||
|
||||
|
||||
/********************* methods ****************************/
|
||||
|
||||
protected void calculateOffsetsAndLengths(){
|
||||
tokenWidth = tokenizerNode.getTokenLength();
|
||||
if(!tokenizerNode.isRoot()){
|
||||
--tokenWidth;//root has no parent
|
||||
}
|
||||
fanOut = CollectionUtils.nullSafeSize(tokenizerNode.getChildren());
|
||||
numCells = tokenizerNode.getNumOccurrences();
|
||||
}
|
||||
|
||||
public int calculateWidth(){
|
||||
calculateWidthOverrideOffsetWidth(blockMeta.getNextNodeOffsetWidth());
|
||||
return width;
|
||||
}
|
||||
|
||||
public int calculateWidthOverrideOffsetWidth(int offsetWidth){
|
||||
width = 0;
|
||||
width += UVIntTool.numBytes(tokenWidth);
|
||||
width += tokenWidth;
|
||||
|
||||
width += UVIntTool.numBytes(fanOut);
|
||||
width += fanOut;
|
||||
|
||||
width += UVIntTool.numBytes(numCells);
|
||||
|
||||
if(tokenizerNode.hasOccurrences()){
|
||||
int fixedBytesPerCell = blockMeta.getFamilyOffsetWidth()
|
||||
+ blockMeta.getQualifierOffsetWidth()
|
||||
+ blockMeta.getTimestampIndexWidth()
|
||||
+ blockMeta.getMvccVersionIndexWidth()
|
||||
+ blockMeta.getKeyValueTypeWidth()
|
||||
+ blockMeta.getValueOffsetWidth()
|
||||
+ blockMeta.getValueLengthWidth();
|
||||
width += numCells * fixedBytesPerCell;
|
||||
}
|
||||
|
||||
if( ! tokenizerNode.isLeaf()){
|
||||
width += fanOut * offsetWidth;
|
||||
}
|
||||
|
||||
return width;
|
||||
}
|
||||
|
||||
|
||||
/*********************** writing the compiled structure to the OutputStream ***************/
|
||||
|
||||
public void write(OutputStream os) throws IOException{
|
||||
//info about this row trie node
|
||||
writeRowToken(os);
|
||||
writeFan(os);
|
||||
writeNumCells(os);
|
||||
|
||||
//UFInt indexes and offsets for each cell in the row (if nub or leaf)
|
||||
writeFamilyNodeOffsets(os);
|
||||
writeQualifierNodeOffsets(os);
|
||||
writeTimestampIndexes(os);
|
||||
writeMvccVersionIndexes(os);
|
||||
writeCellTypes(os);
|
||||
writeValueOffsets(os);
|
||||
writeValueLengths(os);
|
||||
|
||||
//offsets to the children of this row trie node (if branch or nub)
|
||||
writeNextRowTrieNodeOffsets(os);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Row node token, fan, and numCells. Written once at the beginning of each row node. These 3
|
||||
* fields can reproduce all the row keys that compose the block.
|
||||
*/
|
||||
|
||||
/**
|
||||
* UVInt: tokenWidth
|
||||
* bytes: token
|
||||
*/
|
||||
protected void writeRowToken(OutputStream os) throws IOException {
|
||||
UVIntTool.writeBytes(tokenWidth, os);
|
||||
int tokenStartIndex = tokenizerNode.isRoot() ? 0 : 1;
|
||||
ByteRangeTool.write(os, tokenizerNode.getToken(), tokenStartIndex);
|
||||
}
|
||||
|
||||
/**
|
||||
* UVInt: numFanBytes/fanOut
|
||||
* bytes: each fan byte
|
||||
*/
|
||||
public void writeFan(OutputStream os) throws IOException {
|
||||
UVIntTool.writeBytes(fanOut, os);
|
||||
if (fanOut <= 0) {
|
||||
return;
|
||||
}
|
||||
ArrayList<TokenizerNode> children = tokenizerNode.getChildren();
|
||||
for (int i = 0; i < children.size(); ++i) {
|
||||
TokenizerNode child = children.get(i);
|
||||
os.write(child.getToken().get(0));// first byte of each child's token
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* UVInt: numCells, the number of cells in this row which will be 0 for branch nodes
|
||||
*/
|
||||
protected void writeNumCells(OutputStream os) throws IOException {
|
||||
UVIntTool.writeBytes(numCells, os);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* The following methods write data for each cell in the row, mostly consisting of indexes or
|
||||
* offsets into the timestamp/column data structures that are written in the middle of the block.
|
||||
* We use {@link UFIntTool} to encode these indexes/offsets to allow random access during a binary
|
||||
* search of a particular column/timestamp combination.
|
||||
* <p/>
|
||||
* Branch nodes will not have any data in these sections.
|
||||
*/
|
||||
|
||||
protected void writeFamilyNodeOffsets(OutputStream os) throws IOException {
|
||||
if (blockMeta.getFamilyOffsetWidth() <= 0) {
|
||||
return;
|
||||
}
|
||||
for (int i = 0; i < numCells; ++i) {
|
||||
int cellInsertionIndex = PrefixTreeEncoder.MULITPLE_FAMILIES_POSSIBLE ? tokenizerNode
|
||||
.getFirstInsertionIndex() + i : 0;
|
||||
int sortedIndex = prefixTreeEncoder.getFamilySorter().getSortedIndexForInsertionId(
|
||||
cellInsertionIndex);
|
||||
int indexedFamilyOffset = prefixTreeEncoder.getFamilyWriter().getOutputArrayOffset(
|
||||
sortedIndex);
|
||||
UFIntTool.writeBytes(blockMeta.getFamilyOffsetWidth(), indexedFamilyOffset, os);
|
||||
}
|
||||
}
|
||||
|
||||
protected void writeQualifierNodeOffsets(OutputStream os) throws IOException {
|
||||
if (blockMeta.getQualifierOffsetWidth() <= 0) {
|
||||
return;
|
||||
}
|
||||
for (int i = 0; i < numCells; ++i) {
|
||||
int cellInsertionIndex = tokenizerNode.getFirstInsertionIndex() + i;
|
||||
int sortedIndex = prefixTreeEncoder.getQualifierSorter().getSortedIndexForInsertionId(
|
||||
cellInsertionIndex);
|
||||
int indexedQualifierOffset = prefixTreeEncoder.getQualifierWriter().getOutputArrayOffset(
|
||||
sortedIndex);
|
||||
UFIntTool.writeBytes(blockMeta.getQualifierOffsetWidth(), indexedQualifierOffset, os);
|
||||
}
|
||||
}
|
||||
|
||||
protected void writeTimestampIndexes(OutputStream os) throws IOException {
|
||||
if (blockMeta.getTimestampIndexWidth() <= 0) {
|
||||
return;
|
||||
}
|
||||
for (int i = 0; i < numCells; ++i) {
|
||||
int cellInsertionIndex = tokenizerNode.getFirstInsertionIndex() + i;
|
||||
long timestamp = prefixTreeEncoder.getTimestamps()[cellInsertionIndex];
|
||||
int timestampIndex = prefixTreeEncoder.getTimestampEncoder().getIndex(timestamp);
|
||||
UFIntTool.writeBytes(blockMeta.getTimestampIndexWidth(), timestampIndex, os);
|
||||
}
|
||||
}
|
||||
|
||||
protected void writeMvccVersionIndexes(OutputStream os) throws IOException {
|
||||
if (blockMeta.getMvccVersionIndexWidth() <= 0) {
|
||||
return;
|
||||
}
|
||||
for (int i = 0; i < numCells; ++i) {
|
||||
int cellInsertionIndex = tokenizerNode.getFirstInsertionIndex() + i;
|
||||
long mvccVersion = prefixTreeEncoder.getMvccVersions()[cellInsertionIndex];
|
||||
int mvccVersionIndex = prefixTreeEncoder.getMvccVersionEncoder().getIndex(mvccVersion);
|
||||
UFIntTool.writeBytes(blockMeta.getMvccVersionIndexWidth(), mvccVersionIndex, os);
|
||||
}
|
||||
}
|
||||
|
||||
protected void writeCellTypes(OutputStream os) throws IOException {
|
||||
if (blockMeta.isAllSameType()) {
|
||||
return;
|
||||
}
|
||||
for (int i = 0; i < numCells; ++i) {
|
||||
int cellInsertionIndex = tokenizerNode.getFirstInsertionIndex() + i;
|
||||
os.write(prefixTreeEncoder.getTypeBytes()[cellInsertionIndex]);
|
||||
}
|
||||
}
|
||||
|
||||
protected void writeValueOffsets(OutputStream os) throws IOException {
|
||||
for (int i = 0; i < numCells; ++i) {
|
||||
int cellInsertionIndex = tokenizerNode.getFirstInsertionIndex() + i;
|
||||
long valueStartIndex = prefixTreeEncoder.getValueOffset(cellInsertionIndex);
|
||||
UFIntTool.writeBytes(blockMeta.getValueOffsetWidth(), valueStartIndex, os);
|
||||
}
|
||||
}
|
||||
|
||||
protected void writeValueLengths(OutputStream os) throws IOException {
|
||||
for (int i = 0; i < numCells; ++i) {
|
||||
int cellInsertionIndex = tokenizerNode.getFirstInsertionIndex() + i;
|
||||
int valueLength = prefixTreeEncoder.getValueLength(cellInsertionIndex);
|
||||
UFIntTool.writeBytes(blockMeta.getValueLengthWidth(), valueLength, os);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* If a branch or a nub, the last thing we append are the UFInt offsets to the child row nodes.
|
||||
*/
|
||||
protected void writeNextRowTrieNodeOffsets(OutputStream os) throws IOException {
|
||||
ArrayList<TokenizerNode> children = tokenizerNode.getChildren();
|
||||
for (int i = 0; i < children.size(); ++i) {
|
||||
TokenizerNode child = children.get(i);
|
||||
int distanceToChild = tokenizerNode.getNegativeIndex() - child.getNegativeIndex();
|
||||
UFIntTool.writeBytes(blockMeta.getNextNodeOffsetWidth(), distanceToChild, os);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,219 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hbase.codec.prefixtree.encode.row;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.OutputStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.hadoop.classification.InterfaceAudience;
|
||||
import org.apache.hbase.codec.prefixtree.PrefixTreeBlockMeta;
|
||||
import org.apache.hbase.codec.prefixtree.encode.PrefixTreeEncoder;
|
||||
import org.apache.hbase.codec.prefixtree.encode.tokenize.TokenizerNode;
|
||||
import org.apache.hbase.util.vint.UFIntTool;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
|
||||
/**
|
||||
* Most of the complexity of the PrefixTree is contained in the "row section". It contains the row
|
||||
* key trie structure used to search and recreate all the row keys. Each nub and leaf in this trie
|
||||
* also contains references to offsets in the other sections of the data block that enable the
|
||||
* decoder to match a row key with its qualifier, timestamp, type, value, etc.
|
||||
* <p>
|
||||
* The row section is a concatenated collection of {@link RowNodeWriter}s. See that class for the
|
||||
* internals of each row node.
|
||||
*/
|
||||
@InterfaceAudience.Private
|
||||
public class RowSectionWriter {
|
||||
|
||||
/***************** fields **************************/
|
||||
|
||||
protected PrefixTreeEncoder prefixTreeEncoder;
|
||||
|
||||
protected PrefixTreeBlockMeta blockMeta;
|
||||
|
||||
protected int numBytes;
|
||||
|
||||
protected ArrayList<TokenizerNode> nonLeaves;
|
||||
protected ArrayList<TokenizerNode> leaves;
|
||||
|
||||
protected ArrayList<RowNodeWriter> leafWriters;
|
||||
protected ArrayList<RowNodeWriter> nonLeafWriters;
|
||||
|
||||
protected int numLeafWriters;
|
||||
protected int numNonLeafWriters;
|
||||
|
||||
|
||||
/********************* construct **********************/
|
||||
|
||||
public RowSectionWriter() {
|
||||
this.nonLeaves = Lists.newArrayList();
|
||||
this.leaves = Lists.newArrayList();
|
||||
this.leafWriters = Lists.newArrayList();
|
||||
this.nonLeafWriters = Lists.newArrayList();
|
||||
}
|
||||
|
||||
public RowSectionWriter(PrefixTreeEncoder prefixTreeEncoder) {
|
||||
reconstruct(prefixTreeEncoder);
|
||||
}
|
||||
|
||||
public void reconstruct(PrefixTreeEncoder prefixTreeEncoder) {
|
||||
this.prefixTreeEncoder = prefixTreeEncoder;
|
||||
this.blockMeta = prefixTreeEncoder.getBlockMeta();
|
||||
reset();
|
||||
}
|
||||
|
||||
public void reset() {
|
||||
numBytes = 0;
|
||||
nonLeaves.clear();
|
||||
leaves.clear();
|
||||
numLeafWriters = 0;
|
||||
numNonLeafWriters = 0;
|
||||
}
|
||||
|
||||
|
||||
/****************** methods *******************************/
|
||||
|
||||
public RowSectionWriter compile() {
|
||||
blockMeta.setMaxRowLength(prefixTreeEncoder.getRowTokenizer().getMaxElementLength());
|
||||
prefixTreeEncoder.getRowTokenizer().setNodeFirstInsertionIndexes();
|
||||
|
||||
prefixTreeEncoder.getRowTokenizer().appendNodes(nonLeaves, true, false);
|
||||
prefixTreeEncoder.getRowTokenizer().appendNodes(leaves, false, true);
|
||||
|
||||
// track the starting position of each node in final output
|
||||
int negativeIndex = 0;
|
||||
|
||||
// create leaf writer nodes
|
||||
// leaf widths are known at this point, so add them up
|
||||
int totalLeafBytes = 0;
|
||||
for (int i = leaves.size() - 1; i >= 0; --i) {
|
||||
TokenizerNode leaf = leaves.get(i);
|
||||
RowNodeWriter leafWriter = initializeWriter(leafWriters, numLeafWriters, leaf);
|
||||
++numLeafWriters;
|
||||
// leaves store all but their first token byte
|
||||
int leafNodeWidth = leafWriter.calculateWidthOverrideOffsetWidth(0);
|
||||
totalLeafBytes += leafNodeWidth;
|
||||
negativeIndex += leafNodeWidth;
|
||||
leaf.setNegativeIndex(negativeIndex);
|
||||
}
|
||||
|
||||
int totalNonLeafBytesWithoutOffsets = 0;
|
||||
int totalChildPointers = 0;
|
||||
for (int i = nonLeaves.size() - 1; i >= 0; --i) {
|
||||
TokenizerNode nonLeaf = nonLeaves.get(i);
|
||||
RowNodeWriter nonLeafWriter = initializeWriter(nonLeafWriters, numNonLeafWriters, nonLeaf);
|
||||
++numNonLeafWriters;
|
||||
totalNonLeafBytesWithoutOffsets += nonLeafWriter.calculateWidthOverrideOffsetWidth(0);
|
||||
totalChildPointers += nonLeaf.getNumChildren();
|
||||
}
|
||||
|
||||
// figure out how wide our offset FInts are
|
||||
int offsetWidth = 0;
|
||||
while (true) {
|
||||
++offsetWidth;
|
||||
int offsetBytes = totalChildPointers * offsetWidth;
|
||||
int totalRowBytes = totalNonLeafBytesWithoutOffsets + offsetBytes + totalLeafBytes;
|
||||
if (totalRowBytes < UFIntTool.maxValueForNumBytes(offsetWidth)) {
|
||||
// it fits
|
||||
numBytes = totalRowBytes;
|
||||
break;
|
||||
}
|
||||
}
|
||||
blockMeta.setNextNodeOffsetWidth(offsetWidth);
|
||||
|
||||
// populate negativeIndexes
|
||||
for (int i = nonLeaves.size() - 1; i >= 0; --i) {
|
||||
TokenizerNode nonLeaf = nonLeaves.get(i);
|
||||
int writerIndex = nonLeaves.size() - i - 1;
|
||||
RowNodeWriter nonLeafWriter = nonLeafWriters.get(writerIndex);
|
||||
int nodeWidth = nonLeafWriter.calculateWidth();
|
||||
negativeIndex += nodeWidth;
|
||||
nonLeaf.setNegativeIndex(negativeIndex);
|
||||
}
|
||||
|
||||
return this;
|
||||
}
|
||||
|
||||
protected RowNodeWriter initializeWriter(List<RowNodeWriter> list, int index,
|
||||
TokenizerNode builderNode) {
|
||||
RowNodeWriter rowNodeWriter = null;
|
||||
//check if there is an existing node we can recycle
|
||||
if (index >= list.size()) {
|
||||
//there are not enough existing nodes, so add a new one which will be retrieved below
|
||||
list.add(new RowNodeWriter(prefixTreeEncoder, builderNode));
|
||||
}
|
||||
rowNodeWriter = list.get(index);
|
||||
rowNodeWriter.reset(builderNode);
|
||||
return rowNodeWriter;
|
||||
}
|
||||
|
||||
|
||||
public void writeBytes(OutputStream os) throws IOException {
|
||||
for (int i = numNonLeafWriters - 1; i >= 0; --i) {
|
||||
RowNodeWriter nonLeafWriter = nonLeafWriters.get(i);
|
||||
nonLeafWriter.write(os);
|
||||
}
|
||||
// duplicates above... written more for clarity right now
|
||||
for (int i = numLeafWriters - 1; i >= 0; --i) {
|
||||
RowNodeWriter leafWriter = leafWriters.get(i);
|
||||
leafWriter.write(os);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/***************** static ******************************/
|
||||
|
||||
protected static ArrayList<TokenizerNode> filterByLeafAndReverse(
|
||||
ArrayList<TokenizerNode> ins, boolean leaves) {
|
||||
ArrayList<TokenizerNode> outs = Lists.newArrayList();
|
||||
for (int i = ins.size() - 1; i >= 0; --i) {
|
||||
TokenizerNode n = ins.get(i);
|
||||
if (n.isLeaf() && leaves || (!n.isLeaf() && !leaves)) {
|
||||
outs.add(ins.get(i));
|
||||
}
|
||||
}
|
||||
return outs;
|
||||
}
|
||||
|
||||
|
||||
/************* get/set **************************/
|
||||
|
||||
public int getNumBytes() {
|
||||
return numBytes;
|
||||
}
|
||||
|
||||
public ArrayList<TokenizerNode> getNonLeaves() {
|
||||
return nonLeaves;
|
||||
}
|
||||
|
||||
public ArrayList<TokenizerNode> getLeaves() {
|
||||
return leaves;
|
||||
}
|
||||
|
||||
public ArrayList<RowNodeWriter> getNonLeafWriters() {
|
||||
return nonLeafWriters;
|
||||
}
|
||||
|
||||
public ArrayList<RowNodeWriter> getLeafWriters() {
|
||||
return leafWriters;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,64 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hbase.codec.prefixtree.encode.tokenize;
|
||||
|
||||
import java.util.Comparator;
|
||||
|
||||
import org.apache.hadoop.classification.InterfaceAudience;
|
||||
|
||||
/**
|
||||
* Determines order of nodes in the output array. Maybe possible to optimize further.
|
||||
*/
|
||||
@InterfaceAudience.Private
|
||||
public class TokenDepthComparator implements Comparator<TokenizerNode> {
|
||||
|
||||
@Override
|
||||
public int compare(TokenizerNode a, TokenizerNode b) {
|
||||
if(a==null){
|
||||
throw new IllegalArgumentException("a cannot be null");
|
||||
}
|
||||
if(b==null){
|
||||
throw new IllegalArgumentException("b cannot be null");
|
||||
}
|
||||
|
||||
// put leaves at the end
|
||||
if (!a.isLeaf() && b.isLeaf()) {
|
||||
return -1;
|
||||
}
|
||||
if (a.isLeaf() && !b.isLeaf()) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (a.isLeaf() && b.isLeaf()) {// keep leaves in sorted order (for debugability)
|
||||
return a.getId() < b.getId() ? -1 : 1;
|
||||
}
|
||||
|
||||
// compare depth
|
||||
if (a.getTokenOffset() < b.getTokenOffset()) {
|
||||
return -1;
|
||||
}
|
||||
if (a.getTokenOffset() > b.getTokenOffset()) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
// if same depth, return lower id first. ids are unique
|
||||
return a.getId() < b.getId() ? -1 : 1;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,239 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hbase.codec.prefixtree.encode.tokenize;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.hadoop.classification.InterfaceAudience;
|
||||
import org.apache.hadoop.hbase.util.ArrayUtils;
|
||||
import org.apache.hadoop.hbase.util.ByteRange;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
import org.apache.hadoop.hbase.util.CollectionUtils;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
|
||||
/**
|
||||
* Data structure used in the first stage of PrefixTree encoding:
|
||||
* <li>accepts a sorted stream of ByteRanges
|
||||
* <li>splits them into a set of tokens, each held by a {@link TokenizerNode}
|
||||
* <li>connects the TokenizerNodes via standard java references
|
||||
* <li>keeps a pool of TokenizerNodes and a reusable byte[] for holding all token content
|
||||
* <p><br>
|
||||
* Mainly used for turning Cell rowKeys into a trie, but also used for family and qualifier
|
||||
* encoding.
|
||||
*/
|
||||
@InterfaceAudience.Private
|
||||
public class Tokenizer{
|
||||
|
||||
/***************** fields **************************/
|
||||
|
||||
protected int numArraysAdded = 0;
|
||||
protected long lastNodeId = -1;
|
||||
protected ArrayList<TokenizerNode> nodes;
|
||||
protected int numNodes;
|
||||
protected TokenizerNode root;
|
||||
protected byte[] tokens;
|
||||
protected int tokensLength;
|
||||
|
||||
protected int maxElementLength = 0;
|
||||
// number of levels in the tree assuming root level is 0
|
||||
protected int treeDepth = 0;
|
||||
|
||||
|
||||
/******************* construct *******************/
|
||||
|
||||
public Tokenizer() {
|
||||
this.nodes = Lists.newArrayList();
|
||||
this.tokens = new byte[0];
|
||||
}
|
||||
|
||||
public void reset() {
|
||||
numArraysAdded = 0;
|
||||
lastNodeId = -1;
|
||||
numNodes = 0;
|
||||
tokensLength = 0;
|
||||
root = null;
|
||||
maxElementLength = 0;
|
||||
treeDepth = 0;
|
||||
}
|
||||
|
||||
|
||||
/***************** building *************************/
|
||||
|
||||
public void addAll(ArrayList<ByteRange> sortedByteRanges) {
|
||||
for (int i = 0; i < sortedByteRanges.size(); ++i) {
|
||||
ByteRange byteRange = sortedByteRanges.get(i);
|
||||
addSorted(byteRange);
|
||||
}
|
||||
}
|
||||
|
||||
public void addSorted(final ByteRange bytes) {
|
||||
++numArraysAdded;
|
||||
if (bytes.getLength() > maxElementLength) {
|
||||
maxElementLength = bytes.getLength();
|
||||
}
|
||||
if (root == null) {
|
||||
// nodeDepth of firstNode (non-root) is 1
|
||||
root = addNode(null, 1, 0, bytes, 0);
|
||||
} else {
|
||||
root.addSorted(bytes);
|
||||
}
|
||||
}
|
||||
|
||||
public void incrementNumOccurrencesOfLatestValue(){
|
||||
CollectionUtils.getLast(nodes).incrementNumOccurrences(1);
|
||||
}
|
||||
|
||||
protected long nextNodeId() {
|
||||
return ++lastNodeId;
|
||||
}
|
||||
|
||||
protected TokenizerNode addNode(TokenizerNode parent, int nodeDepth, int tokenStartOffset,
|
||||
final ByteRange token, int inputTokenOffset) {
|
||||
int inputTokenLength = token.getLength() - inputTokenOffset;
|
||||
int tokenOffset = appendTokenAndRepointByteRange(token, inputTokenOffset);
|
||||
TokenizerNode node = null;
|
||||
if (nodes.size() <= numNodes) {
|
||||
node = new TokenizerNode(this, parent, nodeDepth, tokenStartOffset, tokenOffset,
|
||||
inputTokenLength);
|
||||
nodes.add(node);
|
||||
} else {
|
||||
node = nodes.get(numNodes);
|
||||
node.reset();
|
||||
node.reconstruct(this, parent, nodeDepth, tokenStartOffset, tokenOffset, inputTokenLength);
|
||||
}
|
||||
++numNodes;
|
||||
return node;
|
||||
}
|
||||
|
||||
protected int appendTokenAndRepointByteRange(final ByteRange token, int inputTokenOffset) {
|
||||
int newOffset = tokensLength;
|
||||
int inputTokenLength = token.getLength() - inputTokenOffset;
|
||||
int newMinimum = tokensLength + inputTokenLength;
|
||||
tokens = ArrayUtils.growIfNecessary(tokens, newMinimum, 2 * newMinimum);
|
||||
token.deepCopySubRangeTo(inputTokenOffset, inputTokenLength, tokens, tokensLength);
|
||||
tokensLength += inputTokenLength;
|
||||
return newOffset;
|
||||
}
|
||||
|
||||
protected void submitMaxNodeDepthCandidate(int nodeDepth) {
|
||||
if (nodeDepth > treeDepth) {
|
||||
treeDepth = nodeDepth;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/********************* read ********************/
|
||||
|
||||
public int getNumAdded(){
|
||||
return numArraysAdded;
|
||||
}
|
||||
|
||||
// for debugging
|
||||
public ArrayList<TokenizerNode> getNodes(boolean includeNonLeaves, boolean includeLeaves) {
|
||||
ArrayList<TokenizerNode> nodes = Lists.newArrayList();
|
||||
root.appendNodesToExternalList(nodes, includeNonLeaves, includeLeaves);
|
||||
return nodes;
|
||||
}
|
||||
|
||||
public void appendNodes(List<TokenizerNode> appendTo, boolean includeNonLeaves,
|
||||
boolean includeLeaves) {
|
||||
root.appendNodesToExternalList(appendTo, includeNonLeaves, includeLeaves);
|
||||
}
|
||||
|
||||
public List<byte[]> getArrays() {
|
||||
List<TokenizerNode> nodes = new ArrayList<TokenizerNode>();
|
||||
root.appendNodesToExternalList(nodes, true, true);
|
||||
List<byte[]> byteArrays = Lists.newArrayListWithCapacity(CollectionUtils.nullSafeSize(nodes));
|
||||
for (int i = 0; i < nodes.size(); ++i) {
|
||||
TokenizerNode node = nodes.get(i);
|
||||
for (int j = 0; j < node.getNumOccurrences(); ++j) {
|
||||
byte[] byteArray = node.getNewByteArray();
|
||||
byteArrays.add(byteArray);
|
||||
}
|
||||
}
|
||||
return byteArrays;
|
||||
}
|
||||
|
||||
//currently unused, but working and possibly useful in the future
|
||||
public void getNode(TokenizerRowSearchResult resultHolder, byte[] key, int keyOffset,
|
||||
int keyLength) {
|
||||
root.getNode(resultHolder, key, keyOffset, keyLength);
|
||||
}
|
||||
|
||||
|
||||
/********************** write ***************************/
|
||||
|
||||
public Tokenizer setNodeFirstInsertionIndexes() {
|
||||
root.setInsertionIndexes(0);
|
||||
return this;
|
||||
}
|
||||
|
||||
public Tokenizer appendOutputArrayOffsets(List<Integer> offsets) {
|
||||
root.appendOutputArrayOffsets(offsets);
|
||||
return this;
|
||||
}
|
||||
|
||||
|
||||
/********************* print/debug ********************/
|
||||
|
||||
protected static final Boolean INCLUDE_FULL_TREE_IN_TO_STRING = false;
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
sb.append(getStructuralString());
|
||||
if (INCLUDE_FULL_TREE_IN_TO_STRING) {
|
||||
for (byte[] bytes : getArrays()) {
|
||||
if (sb.length() > 0) {
|
||||
sb.append("\n");
|
||||
}
|
||||
sb.append(Bytes.toString(bytes));
|
||||
}
|
||||
}
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
public String getStructuralString() {
|
||||
List<TokenizerNode> nodes = getNodes(true, true);
|
||||
StringBuilder sb = new StringBuilder();
|
||||
for (TokenizerNode node : nodes) {
|
||||
String line = node.getPaddedTokenAndOccurrenceString();
|
||||
sb.append(line + "\n");
|
||||
}
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
|
||||
/****************** get/set ************************/
|
||||
|
||||
public TokenizerNode getRoot() {
|
||||
return root;
|
||||
}
|
||||
|
||||
public int getMaxElementLength() {
|
||||
return maxElementLength;
|
||||
}
|
||||
|
||||
public int getTreeDepth() {
|
||||
return treeDepth;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,632 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hbase.codec.prefixtree.encode.tokenize;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.hadoop.classification.InterfaceAudience;
|
||||
import org.apache.hadoop.hbase.util.ByteRange;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
import org.apache.hadoop.hbase.util.CollectionUtils;
|
||||
import org.apache.hadoop.hbase.util.Strings;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
|
||||
/**
|
||||
* Individual node in a Trie structure. Each node is one of 3 types:
|
||||
* <li>Branch: an internal trie node that may have a token and must have multiple children, but does
|
||||
* not represent an actual input byte[], hence its numOccurrences is 0
|
||||
* <li>Leaf: a node with no children and where numOccurrences is >= 1. It's token represents the
|
||||
* last bytes in the input byte[]s.
|
||||
* <li>Nub: a combination of a branch and leaf. Its token represents the last bytes of input
|
||||
* byte[]s and has numOccurrences >= 1, but it also has child nodes which represent input byte[]s
|
||||
* that add bytes to this nodes input byte[].
|
||||
* <br/><br/>
|
||||
* Example inputs (numInputs=7):
|
||||
* 0: AAA
|
||||
* 1: AAA
|
||||
* 2: AAB
|
||||
* 3: AAB
|
||||
* 4: AAB
|
||||
* 5: AABQQ
|
||||
* 6: AABQQ
|
||||
* <br/><br/>
|
||||
* Resulting TokenizerNodes:
|
||||
* AA <- branch, numOccurrences=0, tokenStartOffset=0, token.length=2
|
||||
* A <- leaf, numOccurrences=2, tokenStartOffset=2, token.length=1
|
||||
* B <- nub, numOccurrences=3, tokenStartOffset=2, token.length=1
|
||||
* QQ <- leaf, numOccurrences=2, tokenStartOffset=3, token.length=2
|
||||
* <br/><br/>
|
||||
* numInputs == 7 == sum(numOccurrences) == 0 + 2 + 3 + 2
|
||||
*/
|
||||
@InterfaceAudience.Private
|
||||
public class TokenizerNode{
|
||||
|
||||
/*
|
||||
* Ref to data structure wrapper
|
||||
*/
|
||||
protected Tokenizer builder;
|
||||
|
||||
/******************************************************************
|
||||
* Tree content/structure used during tokenization
|
||||
* ****************************************************************/
|
||||
|
||||
/*
|
||||
* ref to parent trie node
|
||||
*/
|
||||
protected TokenizerNode parent;
|
||||
|
||||
/*
|
||||
* node depth in trie, irrespective of each node's token length
|
||||
*/
|
||||
protected int nodeDepth;
|
||||
|
||||
/*
|
||||
* start index of this token in original byte[]
|
||||
*/
|
||||
protected int tokenStartOffset;
|
||||
|
||||
/*
|
||||
* bytes for this trie node. can be length 0 in root node
|
||||
*/
|
||||
protected ByteRange token;
|
||||
|
||||
/*
|
||||
* A count of occurrences in the input byte[]s, not the trie structure. 0 for branch nodes, 1+ for
|
||||
* nubs and leaves. If the same byte[] is added to the trie multiple times, this is the only thing
|
||||
* that changes in the tokenizer. As a result, duplicate byte[]s are very inexpensive to encode.
|
||||
*/
|
||||
protected int numOccurrences;
|
||||
|
||||
/*
|
||||
* The maximum fan-out of a byte[] trie is 256, so there are a maximum of 256
|
||||
* child nodes.
|
||||
*/
|
||||
protected ArrayList<TokenizerNode> children;
|
||||
|
||||
|
||||
/*
|
||||
* Fields used later in the encoding process for sorting the nodes into the order they'll be
|
||||
* written to the output byte[]. With these fields, the TokenizerNode and therefore Tokenizer
|
||||
* are not generic data structures but instead are specific to HBase PrefixTree encoding.
|
||||
*/
|
||||
|
||||
/*
|
||||
* unique id assigned to each TokenizerNode
|
||||
*/
|
||||
protected long id;
|
||||
|
||||
/*
|
||||
* set >=0 for nubs and leaves
|
||||
*/
|
||||
protected int firstInsertionIndex = -1;
|
||||
|
||||
/*
|
||||
* A positive value indicating how many bytes before the end of the block this node will start. If
|
||||
* the section is 55 bytes and negativeOffset is 9, then the node will start at 46.
|
||||
*/
|
||||
protected int negativeIndex = 0;
|
||||
|
||||
/*
|
||||
* The offset in the output array at which to start writing this node's token bytes. Influenced
|
||||
* by the lengths of all tokens sorted before this one.
|
||||
*/
|
||||
protected int outputArrayOffset = -1;
|
||||
|
||||
|
||||
/*********************** construct *****************************/
|
||||
|
||||
public TokenizerNode(Tokenizer builder, TokenizerNode parent, int nodeDepth,
|
||||
int tokenStartOffset, int tokenOffset, int tokenLength) {
|
||||
this.token = new ByteRange();
|
||||
reconstruct(builder, parent, nodeDepth, tokenStartOffset, tokenOffset, tokenLength);
|
||||
this.children = Lists.newArrayList();
|
||||
}
|
||||
|
||||
/*
|
||||
* Sub-constructor for initializing all fields without allocating a new object. Used by the
|
||||
* regular constructor.
|
||||
*/
|
||||
public void reconstruct(Tokenizer builder, TokenizerNode parent, int nodeDepth,
|
||||
int tokenStartOffset, int tokenOffset, int tokenLength) {
|
||||
this.builder = builder;
|
||||
this.id = builder.nextNodeId();
|
||||
this.parent = parent;
|
||||
this.nodeDepth = nodeDepth;
|
||||
builder.submitMaxNodeDepthCandidate(nodeDepth);
|
||||
this.tokenStartOffset = tokenStartOffset;
|
||||
this.token.set(builder.tokens, tokenOffset, tokenLength);
|
||||
this.numOccurrences = 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Clear the state of this node so that it looks like it was just allocated.
|
||||
*/
|
||||
public void reset() {
|
||||
builder = null;
|
||||
parent = null;
|
||||
nodeDepth = 0;
|
||||
tokenStartOffset = 0;
|
||||
token.clear();
|
||||
numOccurrences = 0;
|
||||
children.clear();// branches & nubs
|
||||
|
||||
// ids/offsets. used during writing to byte[]
|
||||
id = 0;
|
||||
firstInsertionIndex = -1;// set >=0 for nubs and leaves
|
||||
negativeIndex = 0;
|
||||
outputArrayOffset = -1;
|
||||
}
|
||||
|
||||
|
||||
/************************* building *********************************/
|
||||
|
||||
/*
|
||||
* <li>Only public method used during the tokenization process
|
||||
* <li>Requires that the input ByteRange sort after the previous, and therefore after all previous
|
||||
* inputs
|
||||
* <li>Only looks at bytes of the input array that align with this node's token
|
||||
*/
|
||||
public void addSorted(final ByteRange bytes) {// recursively build the tree
|
||||
|
||||
/*
|
||||
* Recurse deeper into the existing trie structure
|
||||
*/
|
||||
if (matchesToken(bytes) && CollectionUtils.notEmpty(children)) {
|
||||
TokenizerNode lastChild = CollectionUtils.getLast(children);
|
||||
if (lastChild.partiallyMatchesToken(bytes)) {
|
||||
lastChild.addSorted(bytes);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Recursion ended. We must either
|
||||
* <li>1: increment numOccurrences if this input was equal to the previous
|
||||
* <li>2: convert this node from a leaf to a nub, and add a new child leaf
|
||||
* <li>3: split this node into a branch and leaf, and then add a second leaf
|
||||
*/
|
||||
|
||||
// add it as a child of this node
|
||||
int numIdenticalTokenBytes = numIdenticalBytes(bytes);// should be <= token.length
|
||||
int tailOffset = tokenStartOffset + numIdenticalTokenBytes;
|
||||
int tailLength = bytes.getLength() - tailOffset;
|
||||
|
||||
if (numIdenticalTokenBytes == token.getLength()) {
|
||||
if (tailLength == 0) {// identical to this node (case 1)
|
||||
incrementNumOccurrences(1);
|
||||
} else {// identical to this node, but with a few extra tailing bytes. (leaf -> nub) (case 2)
|
||||
int childNodeDepth = nodeDepth + 1;
|
||||
int childTokenStartOffset = tokenStartOffset + numIdenticalTokenBytes;
|
||||
TokenizerNode newChildNode = builder.addNode(this, childNodeDepth, childTokenStartOffset,
|
||||
bytes, tailOffset);
|
||||
addChild(newChildNode);
|
||||
}
|
||||
} else {//numIdenticalBytes > 0, split into branch/leaf and then add second leaf (case 3)
|
||||
split(numIdenticalTokenBytes, bytes);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
protected void addChild(TokenizerNode node) {
|
||||
node.setParent(this);
|
||||
children.add(node);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Called when we need to convert a leaf node into a branch with 2 leaves. Comments inside the
|
||||
* method assume we have token BAA starting at tokenStartOffset=0 and are adding BOO. The output
|
||||
* will be 3 nodes:<br/>
|
||||
* <li>1: B <- branch
|
||||
* <li>2: AA <- leaf
|
||||
* <li>3: OO <- leaf
|
||||
*
|
||||
* @param numTokenBytesToRetain => 1 (the B)
|
||||
* @param bytes => BOO
|
||||
*/
|
||||
protected void split(int numTokenBytesToRetain, final ByteRange bytes) {
|
||||
int childNodeDepth = nodeDepth;
|
||||
int childTokenStartOffset = tokenStartOffset + numTokenBytesToRetain;
|
||||
|
||||
//create leaf AA
|
||||
TokenizerNode firstChild = builder.addNode(this, childNodeDepth, childTokenStartOffset,
|
||||
token, numTokenBytesToRetain);
|
||||
firstChild.setNumOccurrences(numOccurrences);// do before clearing this node's numOccurrences
|
||||
token.setLength(numTokenBytesToRetain);//shorten current token from BAA to B
|
||||
numOccurrences = 0;//current node is now a branch
|
||||
|
||||
moveChildrenToDifferentParent(firstChild);//point the new leaf (AA) to the new branch (B)
|
||||
addChild(firstChild);//add the new leaf (AA) to the branch's (B's) children
|
||||
|
||||
//create leaf OO
|
||||
TokenizerNode secondChild = builder.addNode(this, childNodeDepth, childTokenStartOffset,
|
||||
bytes, tokenStartOffset + numTokenBytesToRetain);
|
||||
addChild(secondChild);//add the new leaf (00) to the branch's (B's) children
|
||||
|
||||
// we inserted branch node B as a new level above/before the two children, so increment the
|
||||
// depths of the children below
|
||||
firstChild.incrementNodeDepthRecursively();
|
||||
secondChild.incrementNodeDepthRecursively();
|
||||
}
|
||||
|
||||
|
||||
protected void incrementNodeDepthRecursively() {
|
||||
++nodeDepth;
|
||||
builder.submitMaxNodeDepthCandidate(nodeDepth);
|
||||
for (int i = 0; i < children.size(); ++i) {
|
||||
children.get(i).incrementNodeDepthRecursively();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
protected void moveChildrenToDifferentParent(TokenizerNode newParent) {
|
||||
for (int i = 0; i < children.size(); ++i) {
|
||||
TokenizerNode child = children.get(i);
|
||||
child.setParent(newParent);
|
||||
newParent.children.add(child);
|
||||
}
|
||||
children.clear();
|
||||
}
|
||||
|
||||
|
||||
/************************ byte[] utils *************************/
|
||||
|
||||
protected boolean partiallyMatchesToken(ByteRange bytes) {
|
||||
return numIdenticalBytes(bytes) > 0;
|
||||
}
|
||||
|
||||
protected boolean matchesToken(ByteRange bytes) {
|
||||
return numIdenticalBytes(bytes) == getTokenLength();
|
||||
}
|
||||
|
||||
protected int numIdenticalBytes(ByteRange bytes) {
|
||||
return token.numEqualPrefixBytes(bytes, tokenStartOffset);
|
||||
}
|
||||
|
||||
|
||||
/***************** moving nodes around ************************/
|
||||
|
||||
public void appendNodesToExternalList(List<TokenizerNode> appendTo, boolean includeNonLeaves,
|
||||
boolean includeLeaves) {
|
||||
if (includeNonLeaves && !isLeaf() || includeLeaves && isLeaf()) {
|
||||
appendTo.add(this);
|
||||
}
|
||||
for (int i = 0; i < children.size(); ++i) {
|
||||
TokenizerNode child = children.get(i);
|
||||
child.appendNodesToExternalList(appendTo, includeNonLeaves, includeLeaves);
|
||||
}
|
||||
}
|
||||
|
||||
public int setInsertionIndexes(int nextIndex) {
|
||||
int newNextIndex = nextIndex;
|
||||
if (hasOccurrences()) {
|
||||
setFirstInsertionIndex(nextIndex);
|
||||
newNextIndex += numOccurrences;
|
||||
}
|
||||
for (int i = 0; i < children.size(); ++i) {
|
||||
TokenizerNode child = children.get(i);
|
||||
newNextIndex = child.setInsertionIndexes(newNextIndex);
|
||||
}
|
||||
return newNextIndex;
|
||||
}
|
||||
|
||||
public void appendOutputArrayOffsets(List<Integer> offsets) {
|
||||
if (hasOccurrences()) {
|
||||
offsets.add(outputArrayOffset);
|
||||
}
|
||||
for (int i = 0; i < children.size(); ++i) {
|
||||
TokenizerNode child = children.get(i);
|
||||
child.appendOutputArrayOffsets(offsets);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/***************** searching *********************************/
|
||||
|
||||
/*
|
||||
* Do a trie style search through the tokenizer. One option for looking up families or qualifiers
|
||||
* during encoding, but currently unused in favor of tracking this information as they are added.
|
||||
*
|
||||
* Keeping code pending further performance testing.
|
||||
*/
|
||||
public void getNode(TokenizerRowSearchResult resultHolder, byte[] key, int keyOffset,
|
||||
int keyLength) {
|
||||
int thisNodeDepthPlusLength = tokenStartOffset + token.getLength();
|
||||
|
||||
// quick check if the key is shorter than this node (may not work for binary search)
|
||||
if (CollectionUtils.isEmpty(children)) {
|
||||
if (thisNodeDepthPlusLength < keyLength) {// ran out of bytes
|
||||
resultHolder.set(TokenizerRowSearchPosition.NO_MATCH, null);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// all token bytes must match
|
||||
for (int i = 0; i < token.getLength(); ++i) {
|
||||
if (key[tokenStartOffset + keyOffset + i] != token.get(i)) {
|
||||
// TODO return whether it's before or after so we can binary search
|
||||
resultHolder.set(TokenizerRowSearchPosition.NO_MATCH, null);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if (thisNodeDepthPlusLength == keyLength && numOccurrences > 0) {
|
||||
resultHolder.set(TokenizerRowSearchPosition.MATCH, this);// MATCH
|
||||
return;
|
||||
}
|
||||
|
||||
if (CollectionUtils.notEmpty(children)) {
|
||||
// TODO binary search the children
|
||||
for (int i = 0; i < children.size(); ++i) {
|
||||
TokenizerNode child = children.get(i);
|
||||
child.getNode(resultHolder, key, keyOffset, keyLength);
|
||||
if (resultHolder.isMatch()) {
|
||||
return;
|
||||
} else if (resultHolder.getDifference() == TokenizerRowSearchPosition.BEFORE) {
|
||||
// passed it, so it doesn't exist
|
||||
resultHolder.set(TokenizerRowSearchPosition.NO_MATCH, null);
|
||||
return;
|
||||
}
|
||||
// key is still AFTER the current node, so continue searching
|
||||
}
|
||||
}
|
||||
|
||||
// checked all children (or there were no children), and didn't find it
|
||||
resultHolder.set(TokenizerRowSearchPosition.NO_MATCH, null);
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
/****************** writing back to byte[]'s *************************/
|
||||
|
||||
public byte[] getNewByteArray() {
|
||||
byte[] arrayToFill = new byte[tokenStartOffset + token.getLength()];
|
||||
fillInBytes(arrayToFill);
|
||||
return arrayToFill;
|
||||
}
|
||||
|
||||
public void fillInBytes(byte[] arrayToFill) {
|
||||
for (int i = 0; i < token.getLength(); ++i) {
|
||||
arrayToFill[tokenStartOffset + i] = token.get(i);
|
||||
}
|
||||
if (parent != null) {
|
||||
parent.fillInBytes(arrayToFill);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/************************** printing ***********************/
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
String s = "";
|
||||
if (parent == null) {
|
||||
s += "R ";
|
||||
} else {
|
||||
s += getBnlIndicator(false) + " " + Bytes.toString(parent.getNewByteArray());
|
||||
}
|
||||
s += "[" + Bytes.toString(token.deepCopyToNewArray()) + "]";
|
||||
if (numOccurrences > 0) {
|
||||
s += "x" + numOccurrences;
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
public String getPaddedTokenAndOccurrenceString() {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
sb.append(getBnlIndicator(true));
|
||||
sb.append(Strings.padFront(numOccurrences + "", ' ', 3));
|
||||
sb.append(Strings.padFront(nodeDepth + "", ' ', 3));
|
||||
if (outputArrayOffset >= 0) {
|
||||
sb.append(Strings.padFront(outputArrayOffset + "", ' ', 3));
|
||||
}
|
||||
sb.append(" ");
|
||||
for (int i = 0; i < tokenStartOffset; ++i) {
|
||||
sb.append(" ");
|
||||
}
|
||||
sb.append(Bytes.toString(token.deepCopyToNewArray()).replaceAll(" ", "_"));
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
public String getBnlIndicator(boolean indent) {
|
||||
if (indent) {
|
||||
if (isNub()) {
|
||||
return " N ";
|
||||
}
|
||||
return isBranch() ? "B " : " L";
|
||||
}
|
||||
if (isNub()) {
|
||||
return "N";
|
||||
}
|
||||
return isBranch() ? "B" : "L";
|
||||
}
|
||||
|
||||
|
||||
/********************** count different node types ********************/
|
||||
|
||||
public int getNumBranchNodesIncludingThisNode() {
|
||||
if (isLeaf()) {
|
||||
return 0;
|
||||
}
|
||||
int totalFromThisPlusChildren = isBranch() ? 1 : 0;
|
||||
for (int i = 0; i < children.size(); ++i) {
|
||||
TokenizerNode child = children.get(i);
|
||||
totalFromThisPlusChildren += child.getNumBranchNodesIncludingThisNode();
|
||||
}
|
||||
return totalFromThisPlusChildren;
|
||||
}
|
||||
|
||||
public int getNumNubNodesIncludingThisNode() {
|
||||
if (isLeaf()) {
|
||||
return 0;
|
||||
}
|
||||
int totalFromThisPlusChildren = isNub() ? 1 : 0;
|
||||
for (int i = 0; i < children.size(); ++i) {
|
||||
TokenizerNode child = children.get(i);
|
||||
totalFromThisPlusChildren += child.getNumNubNodesIncludingThisNode();
|
||||
}
|
||||
return totalFromThisPlusChildren;
|
||||
}
|
||||
|
||||
public int getNumLeafNodesIncludingThisNode() {
|
||||
if (isLeaf()) {
|
||||
return 1;
|
||||
}
|
||||
int totalFromChildren = 0;
|
||||
for (int i = 0; i < children.size(); ++i) {
|
||||
TokenizerNode child = children.get(i);
|
||||
totalFromChildren += child.getNumLeafNodesIncludingThisNode();
|
||||
}
|
||||
return totalFromChildren;
|
||||
}
|
||||
|
||||
|
||||
/*********************** simple read-only methods *******************************/
|
||||
|
||||
public int getNodeDepth() {
|
||||
return nodeDepth;
|
||||
}
|
||||
|
||||
public int getTokenLength() {
|
||||
return token.getLength();
|
||||
}
|
||||
|
||||
public boolean hasOccurrences() {
|
||||
return numOccurrences > 0;
|
||||
}
|
||||
|
||||
public boolean isRoot() {
|
||||
return this.parent == null;
|
||||
}
|
||||
|
||||
public int getNumChildren() {
|
||||
return CollectionUtils.nullSafeSize(children);
|
||||
}
|
||||
|
||||
public TokenizerNode getLastChild() {
|
||||
if (CollectionUtils.isEmpty(children)) {
|
||||
return null;
|
||||
}
|
||||
return CollectionUtils.getLast(children);
|
||||
}
|
||||
|
||||
public boolean isLeaf() {
|
||||
return CollectionUtils.isEmpty(children) && hasOccurrences();
|
||||
}
|
||||
|
||||
public boolean isBranch() {
|
||||
return CollectionUtils.notEmpty(children) && !hasOccurrences();
|
||||
}
|
||||
|
||||
public boolean isNub() {
|
||||
return CollectionUtils.notEmpty(children) && hasOccurrences();
|
||||
}
|
||||
|
||||
|
||||
/********************** simple mutation methods *************************/
|
||||
|
||||
/**
|
||||
* Each occurrence > 1 indicates a repeat of the previous entry. This can be called directly by
|
||||
* an external class without going through the process of detecting a repeat if it is a known
|
||||
* repeat by some external mechanism. PtEncoder uses this when adding cells to a row if it knows
|
||||
* the new cells are part of the current row.
|
||||
* @param d increment by this amount
|
||||
*/
|
||||
public void incrementNumOccurrences(int d) {
|
||||
numOccurrences += d;
|
||||
}
|
||||
|
||||
|
||||
/************************* autogenerated get/set ******************/
|
||||
|
||||
public int getTokenOffset() {
|
||||
return tokenStartOffset;
|
||||
}
|
||||
|
||||
public TokenizerNode getParent() {
|
||||
return parent;
|
||||
}
|
||||
|
||||
public ByteRange getToken() {
|
||||
return token;
|
||||
}
|
||||
|
||||
public int getNumOccurrences() {
|
||||
return numOccurrences;
|
||||
}
|
||||
|
||||
public void setParent(TokenizerNode parent) {
|
||||
this.parent = parent;
|
||||
}
|
||||
|
||||
public void setNumOccurrences(int numOccurrences) {
|
||||
this.numOccurrences = numOccurrences;
|
||||
}
|
||||
|
||||
public ArrayList<TokenizerNode> getChildren() {
|
||||
return children;
|
||||
}
|
||||
|
||||
public long getId() {
|
||||
return id;
|
||||
}
|
||||
|
||||
public int getFirstInsertionIndex() {
|
||||
return firstInsertionIndex;
|
||||
}
|
||||
|
||||
public void setFirstInsertionIndex(int firstInsertionIndex) {
|
||||
this.firstInsertionIndex = firstInsertionIndex;
|
||||
}
|
||||
|
||||
public int getNegativeIndex() {
|
||||
return negativeIndex;
|
||||
}
|
||||
|
||||
public void setNegativeIndex(int negativeIndex) {
|
||||
this.negativeIndex = negativeIndex;
|
||||
}
|
||||
|
||||
public int getOutputArrayOffset() {
|
||||
return outputArrayOffset;
|
||||
}
|
||||
|
||||
public void setOutputArrayOffset(int outputArrayOffset) {
|
||||
this.outputArrayOffset = outputArrayOffset;
|
||||
}
|
||||
|
||||
public void setId(long id) {
|
||||
this.id = id;
|
||||
}
|
||||
|
||||
public void setBuilder(Tokenizer builder) {
|
||||
this.builder = builder;
|
||||
}
|
||||
|
||||
public void setTokenOffset(int tokenOffset) {
|
||||
this.tokenStartOffset = tokenOffset;
|
||||
}
|
||||
|
||||
public void setToken(ByteRange token) {
|
||||
this.token = token;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,38 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hbase.codec.prefixtree.encode.tokenize;
|
||||
|
||||
import org.apache.hadoop.classification.InterfaceAudience;
|
||||
|
||||
|
||||
/**
|
||||
* Warning: currently unused, but code is valid. Pending performance testing on more data sets.
|
||||
*
|
||||
* Where is the key relative to our current position in the tree. For example, the current tree node
|
||||
* is "BEFORE" the key we are seeking
|
||||
*/
|
||||
@InterfaceAudience.Private
|
||||
public enum TokenizerRowSearchPosition {
|
||||
|
||||
AFTER,//the key is after this tree node, so keep searching
|
||||
BEFORE,//in a binary search, this tells us to back up
|
||||
MATCH,//the current node is a full match
|
||||
NO_MATCH,//might as well return a value more informative than null
|
||||
|
||||
}
|
|
@ -0,0 +1,73 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hbase.codec.prefixtree.encode.tokenize;
|
||||
|
||||
import org.apache.hadoop.classification.InterfaceAudience;
|
||||
|
||||
|
||||
/**
|
||||
* for recursively searching a PtBuilder
|
||||
*/
|
||||
@InterfaceAudience.Private
|
||||
public class TokenizerRowSearchResult{
|
||||
|
||||
/************ fields ************************/
|
||||
|
||||
protected TokenizerRowSearchPosition difference;
|
||||
protected TokenizerNode matchingNode;
|
||||
|
||||
|
||||
/*************** construct *****************/
|
||||
|
||||
public TokenizerRowSearchResult() {
|
||||
}
|
||||
|
||||
public TokenizerRowSearchResult(TokenizerRowSearchPosition difference) {
|
||||
this.difference = difference;
|
||||
}
|
||||
|
||||
public TokenizerRowSearchResult(TokenizerNode matchingNode) {
|
||||
this.difference = TokenizerRowSearchPosition.MATCH;
|
||||
this.matchingNode = matchingNode;
|
||||
}
|
||||
|
||||
|
||||
/*************** methods **********************/
|
||||
|
||||
public boolean isMatch() {
|
||||
return TokenizerRowSearchPosition.MATCH == difference;
|
||||
}
|
||||
|
||||
|
||||
/************* get/set ***************************/
|
||||
|
||||
public TokenizerRowSearchPosition getDifference() {
|
||||
return difference;
|
||||
}
|
||||
|
||||
public TokenizerNode getMatchingNode() {
|
||||
return matchingNode;
|
||||
}
|
||||
|
||||
public void set(TokenizerRowSearchPosition difference, TokenizerNode matchingNode) {
|
||||
this.difference = difference;
|
||||
this.matchingNode = matchingNode;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,71 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hbase.codec.prefixtree.scanner;
|
||||
|
||||
import org.apache.hadoop.classification.InterfaceAudience;
|
||||
import org.apache.hbase.Cell;
|
||||
|
||||
/**
|
||||
* Alternate name may be CellInputStream
|
||||
* <p/>
|
||||
* An interface for iterating through a sequence of cells. Similar to Java's Iterator, but without
|
||||
* the hasNext() or remove() methods. The hasNext() method is problematic because it may require
|
||||
* actually loading the next object, which in turn requires storing the previous object somewhere.
|
||||
* The core data block decoder should be as fast as possible, so we push the complexity and
|
||||
* performance expense of concurrently tracking multiple cells to layers above the CellScanner.
|
||||
* <p/>
|
||||
* The getCurrentCell() method will return a reference to a Cell implementation. This reference may
|
||||
* or may not point to a reusable cell implementation, so users of the CellScanner should not, for
|
||||
* example, accumulate a List of Cells. All of the references may point to the same object, which
|
||||
* would be the latest state of the underlying Cell. In short, the Cell is mutable.
|
||||
* <p/>
|
||||
* At a minimum, an implementation will need to be able to advance from one cell to the next in a
|
||||
* LinkedList fashion. The nextQualifier(), nextFamily(), and nextRow() methods can all be
|
||||
* implemented by calling nextCell(), however, if the DataBlockEncoding supports random access into
|
||||
* the block then it may provide smarter versions of these methods.
|
||||
* <p/>
|
||||
* Typical usage:
|
||||
*
|
||||
* <pre>
|
||||
* while (scanner.nextCell()) {
|
||||
* Cell cell = scanner.getCurrentCell();
|
||||
* // do something
|
||||
* }
|
||||
* </pre>
|
||||
*/
|
||||
@InterfaceAudience.Private
|
||||
public interface CellScanner{
|
||||
|
||||
/**
|
||||
* Reset any state in the scanner so it appears it was freshly opened.
|
||||
*/
|
||||
void resetToBeforeFirstEntry();
|
||||
|
||||
/**
|
||||
* @return the current Cell which may be mutable
|
||||
*/
|
||||
Cell getCurrent();
|
||||
|
||||
/**
|
||||
* Advance the scanner 1 cell.
|
||||
* @return true if the next cell is found and getCurrentCell() will return a valid Cell
|
||||
*/
|
||||
boolean next();
|
||||
|
||||
}
|
|
@ -0,0 +1,107 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hbase.codec.prefixtree.scanner;
|
||||
|
||||
import org.apache.hadoop.classification.InterfaceAudience;
|
||||
import org.apache.hbase.Cell;
|
||||
import org.apache.hbase.cell.CellScannerPosition;
|
||||
|
||||
/**
|
||||
* Methods for seeking to a random {@link Cell} inside a sorted collection of cells. Indicates that
|
||||
* the implementation is able to navigate between cells without iterating through every cell.
|
||||
*/
|
||||
@InterfaceAudience.Private
|
||||
public interface CellSearcher extends ReversibleCellScanner {
|
||||
|
||||
/**
|
||||
* Do everything within this scanner's power to find the key. Look forward and backwards.
|
||||
* <p/>
|
||||
* Abort as soon as we know it can't be found, possibly leaving the Searcher in an invalid state.
|
||||
* <p/>
|
||||
* @param key position the CellScanner exactly on this key
|
||||
* @return true if the cell existed and getCurrentCell() holds a valid cell
|
||||
*/
|
||||
boolean positionAt(Cell key);
|
||||
|
||||
/**
|
||||
* Same as positionAt(..), but go to the extra effort of finding the previous key if there's no
|
||||
* exact match.
|
||||
* <p/>
|
||||
* @param key position the CellScanner on this key or the closest cell before
|
||||
* @return AT if exact match<br/>
|
||||
* BEFORE if on last cell before key<br/>
|
||||
* BEFORE_FIRST if key was before the first cell in this scanner's scope
|
||||
*/
|
||||
CellScannerPosition positionAtOrBefore(Cell key);
|
||||
|
||||
/**
|
||||
* Same as positionAt(..), but go to the extra effort of finding the next key if there's no exact
|
||||
* match.
|
||||
* <p/>
|
||||
* @param key position the CellScanner on this key or the closest cell after
|
||||
* @return AT if exact match<br/>
|
||||
* AFTER if on first cell after key<br/>
|
||||
* AFTER_LAST if key was after the last cell in this scanner's scope
|
||||
*/
|
||||
CellScannerPosition positionAtOrAfter(Cell key);
|
||||
|
||||
/**
|
||||
* Note: Added for backwards compatibility with
|
||||
* {@link org.apache.hadoop.hbase.regionserver.KeyValueScanner#reseek}
|
||||
* <p/>
|
||||
* Look for the key, but only look after the current position. Probably not needed for an
|
||||
* efficient tree implementation, but is important for implementations without random access such
|
||||
* as unencoded KeyValue blocks.
|
||||
* <p/>
|
||||
* @param key position the CellScanner exactly on this key
|
||||
* @return true if getCurrent() holds a valid cell
|
||||
*/
|
||||
boolean seekForwardTo(Cell key);
|
||||
|
||||
/**
|
||||
* Same as seekForwardTo(..), but go to the extra effort of finding the next key if there's no
|
||||
* exact match.
|
||||
* <p/>
|
||||
* @param key
|
||||
* @return AT if exact match<br/>
|
||||
* AFTER if on first cell after key<br/>
|
||||
* AFTER_LAST if key was after the last cell in this scanner's scope
|
||||
*/
|
||||
CellScannerPosition seekForwardToOrBefore(Cell key);
|
||||
|
||||
/**
|
||||
* Same as seekForwardTo(..), but go to the extra effort of finding the next key if there's no
|
||||
* exact match.
|
||||
* <p/>
|
||||
* @param key
|
||||
* @return AT if exact match<br/>
|
||||
* AFTER if on first cell after key<br/>
|
||||
* AFTER_LAST if key was after the last cell in this scanner's scope
|
||||
*/
|
||||
CellScannerPosition seekForwardToOrAfter(Cell key);
|
||||
|
||||
/**
|
||||
* Note: This may not be appropriate to have in the interface. Need to investigate.
|
||||
* <p/>
|
||||
* Position the scanner in an invalid state after the last cell: CellScannerPosition.AFTER_LAST.
|
||||
* This is used by tests and for handling certain edge cases.
|
||||
*/
|
||||
void positionAfterLastCell();
|
||||
|
||||
}
|
|
@ -0,0 +1,52 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hbase.codec.prefixtree.scanner;
|
||||
|
||||
import org.apache.hadoop.classification.InterfaceAudience;
|
||||
|
||||
/**
|
||||
* An extension of CellScanner indicating the scanner supports iterating backwards through cells.
|
||||
* <p>
|
||||
* Note: This was not added to suggest that HBase should support client facing reverse Scanners, but
|
||||
* because some {@link CellSearcher} implementations, namely PrefixTree, need a method of backing up
|
||||
* if the positionAt(..) method goes past the requested cell.
|
||||
*/
|
||||
@InterfaceAudience.Private
|
||||
public interface ReversibleCellScanner extends CellScanner {
|
||||
|
||||
/**
|
||||
* Try to position the scanner one Cell before the current position.
|
||||
* @return true if the operation was successful, meaning getCurrentCell() will return a valid
|
||||
* Cell.<br/>
|
||||
* false if there were no previous cells, meaning getCurrentCell() will return null.
|
||||
* Scanner position will be {@link org.apache.hbase.cell.CellScannerPosition#BEFORE_FIRST}
|
||||
*/
|
||||
boolean previous();
|
||||
|
||||
/**
|
||||
* Try to position the scanner in the row before the current row.
|
||||
* @param endOfRow true for the last cell in the previous row; false for the first cell
|
||||
* @return true if the operation was successful, meaning getCurrentCell() will return a valid
|
||||
* Cell.<br/>
|
||||
* false if there were no previous cells, meaning getCurrentCell() will return null.
|
||||
* Scanner position will be {@link org.apache.hbase.cell.CellScannerPosition#BEFORE_FIRST}
|
||||
*/
|
||||
boolean previousRow(boolean endOfRow);
|
||||
|
||||
}
|
|
@ -0,0 +1,180 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hbase.util.byterange;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.hadoop.classification.InterfaceAudience;
|
||||
import org.apache.hadoop.hbase.util.ArrayUtils;
|
||||
import org.apache.hadoop.hbase.util.ByteRange;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
|
||||
/**
|
||||
* Performance oriented class for de-duping and storing arbitrary byte[]'s arriving in non-sorted
|
||||
* order. Appends individual byte[]'s to a single big byte[] to avoid overhead and garbage.
|
||||
* <p>
|
||||
* Current implementations are {@link org.apache.hbase.util.byterange.impl.ByteRangeHashSet} and
|
||||
* {@link org.apache.hbase.util.byterange.impl.ByteRangeTreeSet}, but other options might be a
|
||||
* trie-oriented ByteRangeTrieSet, etc
|
||||
*/
|
||||
@InterfaceAudience.Private
|
||||
public abstract class ByteRangeSet {
|
||||
|
||||
/******************** fields **********************/
|
||||
|
||||
protected byte[] byteAppender;
|
||||
protected int numBytes;
|
||||
|
||||
protected Map<ByteRange, Integer> uniqueIndexByUniqueRange;
|
||||
|
||||
protected ArrayList<ByteRange> uniqueRanges;
|
||||
protected int numUniqueRanges = 0;
|
||||
|
||||
protected int[] uniqueRangeIndexByInsertionId;
|
||||
protected int numInputs;
|
||||
|
||||
protected List<Integer> sortedIndexByUniqueIndex;
|
||||
protected int[] sortedIndexByInsertionId;
|
||||
protected ArrayList<ByteRange> sortedRanges;
|
||||
|
||||
|
||||
/****************** construct **********************/
|
||||
|
||||
protected ByteRangeSet() {
|
||||
this.byteAppender = new byte[0];
|
||||
this.uniqueRanges = Lists.newArrayList();
|
||||
this.uniqueRangeIndexByInsertionId = new int[0];
|
||||
this.sortedIndexByUniqueIndex = Lists.newArrayList();
|
||||
this.sortedIndexByInsertionId = new int[0];
|
||||
this.sortedRanges = Lists.newArrayList();
|
||||
}
|
||||
|
||||
public void reset() {
|
||||
numBytes = 0;
|
||||
uniqueIndexByUniqueRange.clear();
|
||||
numUniqueRanges = 0;
|
||||
numInputs = 0;
|
||||
sortedIndexByUniqueIndex.clear();
|
||||
sortedRanges.clear();
|
||||
}
|
||||
|
||||
|
||||
/*************** abstract *************************/
|
||||
|
||||
public abstract void addToSortedRanges();
|
||||
|
||||
|
||||
/**************** methods *************************/
|
||||
|
||||
/**
|
||||
* Check if the incoming byte range exists. If not, add it to the backing byteAppender[] and
|
||||
* insert it into the tracking Map uniqueIndexByUniqueRange.
|
||||
*/
|
||||
public void add(ByteRange bytes) {
|
||||
Integer index = uniqueIndexByUniqueRange.get(bytes);
|
||||
if (index == null) {
|
||||
index = store(bytes);
|
||||
}
|
||||
int minLength = numInputs + 1;
|
||||
uniqueRangeIndexByInsertionId = ArrayUtils.growIfNecessary(uniqueRangeIndexByInsertionId,
|
||||
minLength, 2 * minLength);
|
||||
uniqueRangeIndexByInsertionId[numInputs] = index;
|
||||
++numInputs;
|
||||
}
|
||||
|
||||
protected int store(ByteRange bytes) {
|
||||
int indexOfNewElement = numUniqueRanges;
|
||||
if (uniqueRanges.size() <= numUniqueRanges) {
|
||||
uniqueRanges.add(new ByteRange());
|
||||
}
|
||||
ByteRange storedRange = uniqueRanges.get(numUniqueRanges);
|
||||
int neededBytes = numBytes + bytes.getLength();
|
||||
byteAppender = ArrayUtils.growIfNecessary(byteAppender, neededBytes, 2 * neededBytes);
|
||||
bytes.deepCopyTo(byteAppender, numBytes);
|
||||
storedRange.set(byteAppender, numBytes, bytes.getLength());// this isn't valid yet
|
||||
numBytes += bytes.getLength();
|
||||
uniqueIndexByUniqueRange.put(storedRange, indexOfNewElement);
|
||||
int newestUniqueIndex = numUniqueRanges;
|
||||
++numUniqueRanges;
|
||||
return newestUniqueIndex;
|
||||
}
|
||||
|
||||
public ByteRangeSet compile() {
|
||||
addToSortedRanges();
|
||||
for (int i = 0; i < sortedRanges.size(); ++i) {
|
||||
sortedIndexByUniqueIndex.add(null);// need to grow the size
|
||||
}
|
||||
// TODO move this to an invert(int[]) util method
|
||||
for (int i = 0; i < sortedIndexByUniqueIndex.size(); ++i) {
|
||||
int uniqueIndex = uniqueIndexByUniqueRange.get(sortedRanges.get(i));
|
||||
sortedIndexByUniqueIndex.set(uniqueIndex, i);
|
||||
}
|
||||
sortedIndexByInsertionId = ArrayUtils.growIfNecessary(sortedIndexByInsertionId, numInputs,
|
||||
numInputs);
|
||||
for (int i = 0; i < numInputs; ++i) {
|
||||
int uniqueRangeIndex = uniqueRangeIndexByInsertionId[i];
|
||||
int sortedIndex = sortedIndexByUniqueIndex.get(uniqueRangeIndex);
|
||||
sortedIndexByInsertionId[i] = sortedIndex;
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
public int getSortedIndexForInsertionId(int insertionId) {
|
||||
return sortedIndexByInsertionId[insertionId];
|
||||
}
|
||||
|
||||
public int size() {
|
||||
return uniqueIndexByUniqueRange.size();
|
||||
}
|
||||
|
||||
|
||||
/***************** standard methods ************************/
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
int i = 0;
|
||||
for (ByteRange r : sortedRanges) {
|
||||
if (i > 0) {
|
||||
sb.append("\n");
|
||||
}
|
||||
sb.append(i + " " + Bytes.toStringBinary(r.deepCopyToNewArray()));
|
||||
++i;
|
||||
}
|
||||
sb.append("\ntotalSize:" + numBytes);
|
||||
sb.append("\navgSize:" + getAvgSize());
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
|
||||
/**************** get/set *****************************/
|
||||
|
||||
public ArrayList<ByteRange> getSortedRanges() {
|
||||
return sortedRanges;
|
||||
}
|
||||
|
||||
public long getAvgSize() {
|
||||
return numBytes / numUniqueRanges;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,57 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hbase.util.byterange.impl;
|
||||
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.hadoop.classification.InterfaceAudience;
|
||||
import org.apache.hadoop.hbase.util.ByteRange;
|
||||
import org.apache.hadoop.hbase.util.CollectionUtils;
|
||||
import org.apache.hadoop.hbase.util.IterableUtils;
|
||||
import org.apache.hbase.util.byterange.ByteRangeSet;
|
||||
|
||||
/**
|
||||
* This is probably the best implementation of ByteRangeSet at the moment, though a HashMap produces
|
||||
* garbage when adding a new element to it. We can probably create a tighter implementation without
|
||||
* pointers or garbage.
|
||||
*/
|
||||
@InterfaceAudience.Private
|
||||
public class ByteRangeHashSet extends ByteRangeSet {
|
||||
|
||||
/************************ constructors *****************************/
|
||||
|
||||
public ByteRangeHashSet() {
|
||||
this.uniqueIndexByUniqueRange = new HashMap<ByteRange, Integer>();
|
||||
}
|
||||
|
||||
public ByteRangeHashSet(List<ByteRange> rawByteArrays) {
|
||||
for (ByteRange in : IterableUtils.nullSafe(rawByteArrays)) {
|
||||
add(in);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void addToSortedRanges() {
|
||||
sortedRanges.addAll(CollectionUtils.nullSafe(uniqueIndexByUniqueRange.keySet()));
|
||||
Collections.sort(sortedRanges);
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,54 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hbase.util.byterange.impl;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.TreeMap;
|
||||
|
||||
import org.apache.hadoop.classification.InterfaceAudience;
|
||||
import org.apache.hadoop.hbase.util.ByteRange;
|
||||
import org.apache.hadoop.hbase.util.CollectionUtils;
|
||||
import org.apache.hadoop.hbase.util.IterableUtils;
|
||||
import org.apache.hbase.util.byterange.ByteRangeSet;
|
||||
|
||||
/**
|
||||
* Not currently used in production, but here as a benchmark comparison against ByteRangeHashSet.
|
||||
*/
|
||||
@InterfaceAudience.Private
|
||||
public class ByteRangeTreeSet extends ByteRangeSet {
|
||||
|
||||
/************************ constructors *****************************/
|
||||
|
||||
public ByteRangeTreeSet() {
|
||||
this.uniqueIndexByUniqueRange = new TreeMap<ByteRange,Integer>();
|
||||
}
|
||||
|
||||
public ByteRangeTreeSet(List<ByteRange> rawByteArrays) {
|
||||
this();//needed to initialize the TreeSet
|
||||
for(ByteRange in : IterableUtils.nullSafe(rawByteArrays)){
|
||||
add(in);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void addToSortedRanges() {
|
||||
sortedRanges.addAll(CollectionUtils.nullSafe(uniqueIndexByUniqueRange.keySet()));
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,116 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hbase.util.vint;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.OutputStream;
|
||||
|
||||
import org.apache.hadoop.classification.InterfaceAudience;
|
||||
|
||||
/**
|
||||
* UFInt is an abbreviation for Unsigned Fixed-width Integer.
|
||||
*
|
||||
* This class converts between positive ints and 1-4 bytes that represent the int. All input ints
|
||||
* must be positive. Max values stored in N bytes are:
|
||||
*
|
||||
* N=1: 2^8 => 256
|
||||
* N=2: 2^16 => 65,536
|
||||
* N=3: 2^24 => 16,777,216
|
||||
* N=4: 2^31 => 2,147,483,648 (Integer.MAX_VALUE)
|
||||
*
|
||||
* This was created to get most of the memory savings of a variable length integer when encoding
|
||||
* an array of input integers, but to fix the number of bytes for each integer to the number needed
|
||||
* to store the maximum integer in the array. This enables a binary search to be performed on the
|
||||
* array of encoded integers.
|
||||
*
|
||||
* PrefixTree nodes often store offsets into a block that can fit into 1 or 2 bytes. Note that if
|
||||
* the maximum value of an array of numbers needs 2 bytes, then it's likely that a majority of the
|
||||
* numbers will also require 2 bytes.
|
||||
*
|
||||
* warnings:
|
||||
* * no input validation for max performance
|
||||
* * no negatives
|
||||
*/
|
||||
@InterfaceAudience.Private
|
||||
public class UFIntTool {
|
||||
|
||||
private static final int NUM_BITS_IN_LONG = 64;
|
||||
|
||||
public static long maxValueForNumBytes(int numBytes) {
|
||||
return (1L << (numBytes * 8)) - 1;
|
||||
}
|
||||
|
||||
public static int numBytes(final long value) {
|
||||
if (value == 0) {// 0 doesn't work with the formula below
|
||||
return 1;
|
||||
}
|
||||
return (NUM_BITS_IN_LONG + 7 - Long.numberOfLeadingZeros(value)) / 8;
|
||||
}
|
||||
|
||||
public static byte[] getBytes(int outputWidth, final long value) {
|
||||
byte[] bytes = new byte[outputWidth];
|
||||
writeBytes(outputWidth, value, bytes, 0);
|
||||
return bytes;
|
||||
}
|
||||
|
||||
public static void writeBytes(int outputWidth, final long value, byte[] bytes, int offset) {
|
||||
bytes[offset + outputWidth - 1] = (byte) value;
|
||||
for (int i = outputWidth - 2; i >= 0; --i) {
|
||||
bytes[offset + i] = (byte) (value >>> (outputWidth - i - 1) * 8);
|
||||
}
|
||||
}
|
||||
|
||||
private static final long[] MASKS = new long[] {
|
||||
(long) 255,
|
||||
(long) 255 << 8,
|
||||
(long) 255 << 16,
|
||||
(long) 255 << 24,
|
||||
(long) 255 << 32,
|
||||
(long) 255 << 40,
|
||||
(long) 255 << 48,
|
||||
(long) 255 << 56
|
||||
};
|
||||
|
||||
public static void writeBytes(int outputWidth, final long value, OutputStream os) throws IOException {
|
||||
for (int i = outputWidth - 1; i >= 0; --i) {
|
||||
os.write((byte) ((value & MASKS[i]) >>> (8 * i)));
|
||||
}
|
||||
}
|
||||
|
||||
public static long fromBytes(final byte[] bytes) {
|
||||
long value = 0;
|
||||
value |= bytes[0] & 0xff;// these seem to do ok without casting the byte to int
|
||||
for (int i = 1; i < bytes.length; ++i) {
|
||||
value <<= 8;
|
||||
value |= bytes[i] & 0xff;
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
public static long fromBytes(final byte[] bytes, final int offset, final int width) {
|
||||
long value = 0;
|
||||
value |= bytes[0 + offset] & 0xff;// these seem to do ok without casting the byte to int
|
||||
for (int i = 1; i < width; ++i) {
|
||||
value <<= 8;
|
||||
value |= bytes[i + offset] & 0xff;
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,115 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hbase.util.vint;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.OutputStream;
|
||||
|
||||
import org.apache.hadoop.classification.InterfaceAudience;
|
||||
|
||||
/**
|
||||
* Simple Variable Length Integer encoding. Left bit of 0 means we are on the last byte. If left
|
||||
* bit of the current byte is 1, then there is at least one more byte.
|
||||
*/
|
||||
@InterfaceAudience.Private
|
||||
public class UVIntTool {
|
||||
|
||||
public static final byte
|
||||
BYTE_7_RIGHT_BITS_SET = 127,
|
||||
BYTE_LEFT_BIT_SET = -128;
|
||||
|
||||
public static final long
|
||||
INT_7_RIGHT_BITS_SET = 127,
|
||||
INT_8TH_BIT_SET = 128;
|
||||
|
||||
public static final byte[]
|
||||
MAX_VALUE_BYTES = new byte[] { -1, -1, -1, -1, 7 };
|
||||
|
||||
/********************* int -> bytes **************************/
|
||||
|
||||
public static int numBytes(int in) {
|
||||
if (in == 0) {
|
||||
// doesn't work with the formula below
|
||||
return 1;
|
||||
}
|
||||
return (38 - Integer.numberOfLeadingZeros(in)) / 7;// 38 comes from 32+(7-1)
|
||||
}
|
||||
|
||||
public static byte[] getBytes(int value) {
|
||||
int numBytes = numBytes(value);
|
||||
byte[] bytes = new byte[numBytes];
|
||||
int remainder = value;
|
||||
for (int i = 0; i < numBytes - 1; ++i) {
|
||||
// set the left bit
|
||||
bytes[i] = (byte) ((remainder & INT_7_RIGHT_BITS_SET) | INT_8TH_BIT_SET);
|
||||
remainder >>= 7;
|
||||
}
|
||||
// do not set the left bit
|
||||
bytes[numBytes - 1] = (byte) (remainder & INT_7_RIGHT_BITS_SET);
|
||||
return bytes;
|
||||
}
|
||||
|
||||
public static int writeBytes(int value, OutputStream os) throws IOException {
|
||||
int numBytes = numBytes(value);
|
||||
int remainder = value;
|
||||
for (int i = 0; i < numBytes - 1; ++i) {
|
||||
// set the left bit
|
||||
os.write((byte) ((remainder & INT_7_RIGHT_BITS_SET) | INT_8TH_BIT_SET));
|
||||
remainder >>= 7;
|
||||
}
|
||||
// do not set the left bit
|
||||
os.write((byte) (remainder & INT_7_RIGHT_BITS_SET));
|
||||
return numBytes;
|
||||
}
|
||||
|
||||
/******************** bytes -> int **************************/
|
||||
|
||||
public static int getInt(byte[] bytes) {
|
||||
return getInt(bytes, 0);
|
||||
}
|
||||
|
||||
public static int getInt(byte[] bytes, int offset) {
|
||||
int value = 0;
|
||||
for (int i = 0;; ++i) {
|
||||
byte b = bytes[offset + i];
|
||||
int shifted = BYTE_7_RIGHT_BITS_SET & b;// kill leftmost bit
|
||||
shifted <<= 7 * i;
|
||||
value |= shifted;
|
||||
if (b >= 0) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
public static int getInt(InputStream is) throws IOException {
|
||||
int value = 0;
|
||||
int i = 0;
|
||||
int b;
|
||||
do{
|
||||
b = is.read();
|
||||
int shifted = BYTE_7_RIGHT_BITS_SET & b;// kill leftmost bit
|
||||
shifted <<= 7 * i;
|
||||
value |= shifted;
|
||||
++i;
|
||||
}while(b > Byte.MAX_VALUE);
|
||||
return value;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,113 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hbase.util.vint;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.OutputStream;
|
||||
|
||||
import org.apache.hadoop.classification.InterfaceAudience;
|
||||
|
||||
/**
|
||||
* Simple Variable Length Integer encoding. Left bit of 0 means we are on the last byte. If left
|
||||
* bit of the current byte is 1, then there is at least one more byte.
|
||||
*/
|
||||
@InterfaceAudience.Private
|
||||
public class UVLongTool{
|
||||
|
||||
public static final byte
|
||||
BYTE_7_RIGHT_BITS_SET = 127,
|
||||
BYTE_LEFT_BIT_SET = -128;
|
||||
|
||||
public static final long
|
||||
LONG_7_RIGHT_BITS_SET = 127,
|
||||
LONG_8TH_BIT_SET = 128;
|
||||
|
||||
public static final byte[]
|
||||
MAX_VALUE_BYTES = new byte[] { -1, -1, -1, -1, -1, -1, -1, -1, 127 };
|
||||
|
||||
|
||||
/********************* long -> bytes **************************/
|
||||
|
||||
public static int numBytes(long in) {// do a check for illegal arguments if not protected
|
||||
if (in == 0) {
|
||||
return 1;
|
||||
}// doesn't work with the formula below
|
||||
return (70 - Long.numberOfLeadingZeros(in)) / 7;// 70 comes from 64+(7-1)
|
||||
}
|
||||
|
||||
public static byte[] getBytes(long value) {
|
||||
int numBytes = numBytes(value);
|
||||
byte[] bytes = new byte[numBytes];
|
||||
long remainder = value;
|
||||
for (int i = 0; i < numBytes - 1; ++i) {
|
||||
bytes[i] = (byte) ((remainder & LONG_7_RIGHT_BITS_SET) | LONG_8TH_BIT_SET);// set the left bit
|
||||
remainder >>= 7;
|
||||
}
|
||||
bytes[numBytes - 1] = (byte) (remainder & LONG_7_RIGHT_BITS_SET);// do not set the left bit
|
||||
return bytes;
|
||||
}
|
||||
|
||||
public static int writeBytes(long value, OutputStream os) throws IOException {
|
||||
int numBytes = numBytes(value);
|
||||
long remainder = value;
|
||||
for (int i = 0; i < numBytes - 1; ++i) {
|
||||
// set the left bit
|
||||
os.write((byte) ((remainder & LONG_7_RIGHT_BITS_SET) | LONG_8TH_BIT_SET));
|
||||
remainder >>= 7;
|
||||
}
|
||||
// do not set the left bit
|
||||
os.write((byte) (remainder & LONG_7_RIGHT_BITS_SET));
|
||||
return numBytes;
|
||||
}
|
||||
|
||||
/******************** bytes -> long **************************/
|
||||
|
||||
public static long getLong(byte[] bytes) {
|
||||
return getLong(bytes, 0);
|
||||
}
|
||||
|
||||
public static long getLong(byte[] bytes, int offset) {
|
||||
long value = 0;
|
||||
for (int i = 0;; ++i) {
|
||||
byte b = bytes[offset + i];
|
||||
long shifted = BYTE_7_RIGHT_BITS_SET & b;// kill leftmost bit
|
||||
shifted <<= 7 * i;
|
||||
value |= shifted;
|
||||
if (b >= 0) {
|
||||
break;
|
||||
}// first bit was 0, so that's the last byte in the VarLong
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
public static long getLong(InputStream is) throws IOException {
|
||||
long value = 0;
|
||||
int i = 0;
|
||||
int b;
|
||||
do {
|
||||
b = is.read();
|
||||
long shifted = BYTE_7_RIGHT_BITS_SET & b;// kill leftmost bit
|
||||
shifted <<= 7 * i;
|
||||
value |= shifted;
|
||||
++i;
|
||||
} while (b > Byte.MAX_VALUE);
|
||||
return value;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,55 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hbase.codec.keyvalue;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.hadoop.hbase.KeyValue;
|
||||
import org.apache.hadoop.hbase.KeyValueTestUtil;
|
||||
import org.apache.hbase.codec.prefixtree.row.TestRowData;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
import org.junit.runner.RunWith;
|
||||
import org.junit.runners.Parameterized;
|
||||
import org.junit.runners.Parameterized.Parameters;
|
||||
|
||||
@RunWith(Parameterized.class)
|
||||
public class TestKeyValueTool {
|
||||
|
||||
@Parameters
|
||||
public static Collection<Object[]> parameters() {
|
||||
return new TestRowData.InMemory().getAllAsObjectArray();
|
||||
}
|
||||
|
||||
private TestRowData rows;
|
||||
|
||||
public TestKeyValueTool(TestRowData testRows) {
|
||||
this.rows = testRows;
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testRoundTripToBytes() {
|
||||
List<KeyValue> kvs = rows.getInputs();
|
||||
ByteBuffer bb = KeyValueTestUtil.toByteBufferAndRewind(kvs, false);
|
||||
List<KeyValue> roundTrippedKvs = KeyValueTestUtil.rewindThenToList(bb, false);
|
||||
Assert.assertArrayEquals(kvs.toArray(), roundTrippedKvs.toArray());
|
||||
}
|
||||
}
|
|
@ -0,0 +1,27 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hbase.codec.prefixtree;
|
||||
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
|
||||
public class PrefixTreeTestConstants {
|
||||
|
||||
public static final byte[] TEST_CF = Bytes.toBytes("cfDefault");
|
||||
|
||||
}
|
|
@ -0,0 +1,82 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hbase.codec.prefixtree.blockmeta;
|
||||
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.nio.ByteBuffer;
|
||||
|
||||
import org.apache.hadoop.hbase.KeyValue;
|
||||
import org.apache.hbase.codec.prefixtree.PrefixTreeBlockMeta;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
|
||||
public class TestBlockMeta {
|
||||
|
||||
static int BLOCK_START = 123;
|
||||
|
||||
private static PrefixTreeBlockMeta createSample() {
|
||||
PrefixTreeBlockMeta m = new PrefixTreeBlockMeta();
|
||||
m.setNumMetaBytes(0);
|
||||
m.setNumKeyValueBytes(3195);
|
||||
|
||||
m.setNumRowBytes(0);
|
||||
m.setNumFamilyBytes(3);
|
||||
m.setNumQualifierBytes(12345);
|
||||
m.setNumTimestampBytes(23456);
|
||||
m.setNumMvccVersionBytes(5);
|
||||
m.setNumValueBytes(34567);
|
||||
|
||||
m.setNextNodeOffsetWidth(3);
|
||||
m.setFamilyOffsetWidth(1);
|
||||
m.setQualifierOffsetWidth(2);
|
||||
m.setTimestampIndexWidth(1);
|
||||
m.setMvccVersionIndexWidth(2);
|
||||
m.setValueOffsetWidth(8);
|
||||
m.setValueLengthWidth(3);
|
||||
|
||||
m.setRowTreeDepth(11);
|
||||
m.setMaxRowLength(200);
|
||||
m.setMaxQualifierLength(50);
|
||||
|
||||
m.setMinTimestamp(1318966363481L);
|
||||
m.setTimestampDeltaWidth(3);
|
||||
m.setMinMvccVersion(100L);
|
||||
m.setMvccVersionDeltaWidth(4);
|
||||
|
||||
m.setAllSameType(false);
|
||||
m.setAllTypes(KeyValue.Type.Delete.getCode());
|
||||
|
||||
m.setNumUniqueRows(88);
|
||||
m.setNumUniqueFamilies(1);
|
||||
m.setNumUniqueQualifiers(56);
|
||||
return m;
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testStreamSerialization() throws IOException {
|
||||
PrefixTreeBlockMeta original = createSample();
|
||||
ByteArrayOutputStream os = new ByteArrayOutputStream(10000);
|
||||
original.writeVariableBytesToOutputStream(os);
|
||||
ByteBuffer buffer = ByteBuffer.wrap(os.toByteArray());
|
||||
PrefixTreeBlockMeta roundTripped = new PrefixTreeBlockMeta(buffer);
|
||||
Assert.assertTrue(original.equals(roundTripped));
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,74 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hbase.codec.prefixtree.builder;
|
||||
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.hadoop.hbase.util.ByteRange;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
import org.apache.hbase.codec.prefixtree.encode.tokenize.Tokenizer;
|
||||
import org.apache.hbase.codec.prefixtree.encode.tokenize.TokenizerNode;
|
||||
import org.apache.hbase.codec.prefixtree.encode.tokenize.TokenizerRowSearchResult;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
import org.junit.runner.RunWith;
|
||||
import org.junit.runners.Parameterized;
|
||||
import org.junit.runners.Parameterized.Parameters;
|
||||
|
||||
@RunWith(Parameterized.class)
|
||||
public class TestTokenizer {
|
||||
|
||||
@Parameters
|
||||
public static Collection<Object[]> parameters() {
|
||||
return new TestTokenizerData.InMemory().getAllAsObjectArray();
|
||||
}
|
||||
|
||||
private List<byte[]> inputs;
|
||||
private Tokenizer builder;
|
||||
private List<byte[]> roundTripped;
|
||||
|
||||
public TestTokenizer(TestTokenizerData sortedByteArrays) {
|
||||
this.inputs = sortedByteArrays.getInputs();
|
||||
this.builder = new Tokenizer();
|
||||
for (byte[] array : inputs) {
|
||||
builder.addSorted(new ByteRange(array));
|
||||
}
|
||||
this.roundTripped = builder.getArrays();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testReaderRoundTrip() {
|
||||
Assert.assertEquals(inputs.size(), roundTripped.size());
|
||||
Assert.assertTrue(Bytes.isSorted(roundTripped));
|
||||
Assert.assertTrue(Bytes.equals(inputs, roundTripped));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSearching() {
|
||||
for (byte[] input : inputs) {
|
||||
TokenizerRowSearchResult resultHolder = new TokenizerRowSearchResult();
|
||||
builder.getNode(resultHolder, input, 0, input.length);
|
||||
TokenizerNode n = resultHolder.getMatchingNode();
|
||||
byte[] output = n.getNewByteArray();
|
||||
Assert.assertTrue(Bytes.equals(input, output));
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,42 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hbase.codec.prefixtree.builder;
|
||||
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.hbase.codec.prefixtree.builder.data.TestTokenizerDataBasic;
|
||||
import org.apache.hbase.codec.prefixtree.builder.data.TestTokenizerDataEdgeCase;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
|
||||
public interface TestTokenizerData {
|
||||
|
||||
List<byte[]> getInputs();
|
||||
List<byte[]> getOutputs();
|
||||
|
||||
public static class InMemory {
|
||||
public Collection<Object[]> getAllAsObjectArray() {
|
||||
List<Object[]> all = Lists.newArrayList();
|
||||
all.add(new Object[] { new TestTokenizerDataBasic() });
|
||||
all.add(new Object[] { new TestTokenizerDataEdgeCase() });
|
||||
return all;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,87 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hbase.codec.prefixtree.builder;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.hadoop.hbase.util.ByteRange;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
import org.apache.hbase.codec.prefixtree.encode.tokenize.Tokenizer;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
import org.mortbay.log.Log;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
|
||||
public class TestTreeDepth {
|
||||
|
||||
@Test
|
||||
public void testSingleNode() {
|
||||
List<String> inputs = Lists.newArrayList("a");
|
||||
testInternal(inputs, 1);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSimpleBranch() {
|
||||
List<String> inputs = Lists.newArrayList("a", "aa", "ab");
|
||||
testInternal(inputs, 2);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testEmptyRoot() {
|
||||
List<String> inputs = Lists.newArrayList("a", "b");
|
||||
testInternal(inputs, 2);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testRootAsNub() {
|
||||
List<String> inputs = Lists.newArrayList("a", "aa");
|
||||
testInternal(inputs, 2);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testRootAsNubPlusNub() {
|
||||
List<String> inputs = Lists.newArrayList("a", "aa", "aaa");
|
||||
testInternal(inputs, 3);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testEmptyRootPlusNub() {
|
||||
List<String> inputs = Lists.newArrayList("a", "aa", "b");
|
||||
testInternal(inputs, 3);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSplitDistantAncestor() {
|
||||
List<String> inputs = Lists.newArrayList("a", "ac", "acd", "b");
|
||||
testInternal(inputs, 4);
|
||||
}
|
||||
|
||||
protected void testInternal(List<String> inputs, int expectedTreeDepth) {
|
||||
Log.warn("init logger");
|
||||
Tokenizer builder = new Tokenizer();
|
||||
for (String s : inputs) {
|
||||
ByteRange b = new ByteRange(Bytes.toBytes(s));
|
||||
builder.addSorted(b);
|
||||
}
|
||||
Assert.assertEquals(1, builder.getRoot().getNodeDepth());
|
||||
Assert.assertEquals(expectedTreeDepth, builder.getTreeDepth());
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,51 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hbase.codec.prefixtree.builder.data;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
import org.apache.hbase.codec.prefixtree.builder.TestTokenizerData;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
|
||||
public class TestTokenizerDataBasic implements TestTokenizerData {
|
||||
|
||||
static List<byte[]> d = Lists.newArrayList();
|
||||
static {
|
||||
List<String> s = Lists.newArrayList();
|
||||
s.add("abc");// nub
|
||||
s.add("abcde");// leaf
|
||||
s.add("bbc");// causes root to split and have empty token
|
||||
s.add("bbc");// makes numOccurrences=2 on the bbc node
|
||||
s.add("cd");// just to get another node after the numOccurrences=2
|
||||
d = Bytes.getUtf8ByteArrays(s);
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<byte[]> getInputs() {
|
||||
return d;
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<byte[]> getOutputs() {
|
||||
return d;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,53 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hbase.codec.prefixtree.builder.data;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
import org.apache.hbase.codec.prefixtree.builder.TestTokenizerData;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
|
||||
public class TestTokenizerDataEdgeCase implements TestTokenizerData {
|
||||
|
||||
static List<byte[]> d = Lists.newArrayList();
|
||||
static {
|
||||
/*
|
||||
* tricky little combination because the acegi token will partially match abdfi, but when you
|
||||
* descend into abdfi, it will not fully match
|
||||
*/
|
||||
List<String> s = Lists.newArrayList();
|
||||
s.add("abdfh");
|
||||
s.add("abdfi");
|
||||
s.add("acegi");
|
||||
d = Bytes.getUtf8ByteArrays(s);
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<byte[]> getInputs() {
|
||||
return d;
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<byte[]> getOutputs() {
|
||||
return d;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,120 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hbase.codec.prefixtree.column;
|
||||
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.hadoop.hbase.util.ByteRange;
|
||||
import org.apache.hadoop.hbase.util.ByteRangeTool;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
import org.apache.hbase.codec.prefixtree.PrefixTreeBlockMeta;
|
||||
import org.apache.hbase.codec.prefixtree.decode.column.ColumnReader;
|
||||
import org.apache.hbase.codec.prefixtree.encode.column.ColumnSectionWriter;
|
||||
import org.apache.hbase.codec.prefixtree.encode.tokenize.Tokenizer;
|
||||
import org.apache.hbase.codec.prefixtree.encode.tokenize.TokenizerNode;
|
||||
import org.apache.hbase.util.byterange.impl.ByteRangeTreeSet;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
import org.junit.runner.RunWith;
|
||||
import org.junit.runners.Parameterized;
|
||||
import org.junit.runners.Parameterized.Parameters;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
|
||||
@RunWith(Parameterized.class)
|
||||
public class TestColumnBuilder {
|
||||
|
||||
@Parameters
|
||||
public static Collection<Object[]> parameters() {
|
||||
return new TestColumnData.InMemory().getAllAsObjectArray();
|
||||
}
|
||||
|
||||
/*********** fields **********************************/
|
||||
|
||||
protected TestColumnData columns;
|
||||
protected ByteRangeTreeSet columnSorter;
|
||||
protected List<ByteRange> sortedUniqueColumns;
|
||||
protected PrefixTreeBlockMeta blockMeta;
|
||||
protected Tokenizer builder;
|
||||
protected ColumnSectionWriter writer;
|
||||
protected byte[] bytes;
|
||||
protected byte[] buffer;
|
||||
protected ColumnReader reader;
|
||||
|
||||
/*************** construct ****************************/
|
||||
|
||||
public TestColumnBuilder(TestColumnData columns) {
|
||||
this.columns = columns;
|
||||
List<ByteRange> inputs = columns.getInputs();
|
||||
this.columnSorter = new ByteRangeTreeSet(inputs);
|
||||
this.sortedUniqueColumns = columnSorter.compile().getSortedRanges();
|
||||
List<byte[]> copies = ByteRangeTool.copyToNewArrays(sortedUniqueColumns);
|
||||
Assert.assertTrue(Bytes.isSorted(copies));
|
||||
this.blockMeta = new PrefixTreeBlockMeta();
|
||||
this.blockMeta.setNumMetaBytes(0);
|
||||
this.blockMeta.setNumRowBytes(0);
|
||||
this.builder = new Tokenizer();
|
||||
}
|
||||
|
||||
/************* methods ********************************/
|
||||
|
||||
@Test
|
||||
public void testReaderRoundTrip() throws IOException {
|
||||
for (int i = 0; i < sortedUniqueColumns.size(); ++i) {
|
||||
ByteRange column = sortedUniqueColumns.get(i);
|
||||
builder.addSorted(column);
|
||||
}
|
||||
List<byte[]> builderOutputArrays = builder.getArrays();
|
||||
for (int i = 0; i < builderOutputArrays.size(); ++i) {
|
||||
byte[] inputArray = sortedUniqueColumns.get(i).deepCopyToNewArray();
|
||||
byte[] outputArray = builderOutputArrays.get(i);
|
||||
boolean same = Bytes.equals(inputArray, outputArray);
|
||||
Assert.assertTrue(same);
|
||||
}
|
||||
Assert.assertEquals(sortedUniqueColumns.size(), builderOutputArrays.size());
|
||||
|
||||
writer = new ColumnSectionWriter(blockMeta, builder, false);
|
||||
ByteArrayOutputStream baos = new ByteArrayOutputStream();
|
||||
writer.compile().writeBytes(baos);
|
||||
bytes = baos.toByteArray();
|
||||
buffer = new byte[blockMeta.getMaxQualifierLength()];
|
||||
reader = new ColumnReader(buffer, false);
|
||||
reader.initOnBlock(blockMeta, bytes);
|
||||
|
||||
List<TokenizerNode> builderNodes = Lists.newArrayList();
|
||||
builder.appendNodes(builderNodes, true, true);
|
||||
int i = 0;
|
||||
for (TokenizerNode builderNode : builderNodes) {
|
||||
if (!builderNode.hasOccurrences()) {
|
||||
continue;
|
||||
}
|
||||
Assert.assertEquals(1, builderNode.getNumOccurrences());// we de-duped before adding to
|
||||
// builder
|
||||
int position = builderNode.getOutputArrayOffset();
|
||||
byte[] output = reader.populateBuffer(position).copyBufferToNewArray();
|
||||
boolean same = Bytes.equals(sortedUniqueColumns.get(i).deepCopyToNewArray(), output);
|
||||
Assert.assertTrue(same);
|
||||
++i;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,45 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hbase.codec.prefixtree.column;
|
||||
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.hadoop.hbase.util.ByteRange;
|
||||
import org.apache.hbase.codec.prefixtree.column.data.TestColumnDataRandom;
|
||||
import org.apache.hbase.codec.prefixtree.column.data.TestColumnDataSimple;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
|
||||
public interface TestColumnData {
|
||||
|
||||
List<ByteRange> getInputs();
|
||||
List<ByteRange> getOutputs();
|
||||
|
||||
public static class InMemory {
|
||||
public Collection<Object[]> getAllAsObjectArray() {
|
||||
List<Object[]> all = Lists.newArrayList();
|
||||
all.add(new Object[] { new TestColumnDataSimple() });
|
||||
for (int leftShift = 0; leftShift < 16; ++leftShift) {
|
||||
all.add(new Object[] { new TestColumnDataRandom(1 << leftShift) });
|
||||
}
|
||||
return all;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,61 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hbase.codec.prefixtree.column.data;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.hadoop.hbase.KeyValue;
|
||||
import org.apache.hadoop.hbase.util.ByteRange;
|
||||
import org.apache.hadoop.hbase.util.test.RedundantKVGenerator;
|
||||
import org.apache.hbase.codec.prefixtree.column.TestColumnData;
|
||||
import org.apache.hbase.util.byterange.ByteRangeSet;
|
||||
import org.apache.hbase.util.byterange.impl.ByteRangeTreeSet;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
|
||||
public class TestColumnDataRandom implements TestColumnData {
|
||||
|
||||
private List<ByteRange> inputs = Lists.newArrayList();
|
||||
private List<ByteRange> outputs = Lists.newArrayList();
|
||||
|
||||
public TestColumnDataRandom(int numColumns) {
|
||||
RedundantKVGenerator generator = new RedundantKVGenerator();
|
||||
ByteRangeSet sortedColumns = new ByteRangeTreeSet();
|
||||
List<KeyValue> d = generator.generateTestKeyValues(numColumns);
|
||||
for (KeyValue col : d) {
|
||||
ByteRange colRange = new ByteRange(col.getQualifier());
|
||||
inputs.add(colRange);
|
||||
sortedColumns.add(colRange);
|
||||
}
|
||||
for (ByteRange col : sortedColumns.compile().getSortedRanges()) {
|
||||
outputs.add(col);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<ByteRange> getInputs() {
|
||||
return inputs;
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<ByteRange> getOutputs() {
|
||||
return outputs;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,52 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hbase.codec.prefixtree.column.data;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.hadoop.hbase.util.ByteRange;
|
||||
import org.apache.hadoop.hbase.util.ByteRangeTool;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
import org.apache.hbase.codec.prefixtree.column.TestColumnData;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
|
||||
public class TestColumnDataSimple implements TestColumnData {
|
||||
|
||||
@Override
|
||||
public List<ByteRange> getInputs() {
|
||||
List<String> d = Lists.newArrayList();
|
||||
d.add("abc");
|
||||
d.add("abcde");
|
||||
d.add("abc");
|
||||
d.add("bbc");
|
||||
d.add("abc");
|
||||
return ByteRangeTool.fromArrays(Bytes.getUtf8ByteArrays(d));
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<ByteRange> getOutputs() {
|
||||
List<String> d = Lists.newArrayList();
|
||||
d.add("abc");
|
||||
d.add("abcde");
|
||||
d.add("bbc");
|
||||
return ByteRangeTool.fromArrays(Bytes.getUtf8ByteArrays(d));
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,54 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hbase.codec.prefixtree.row;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.hadoop.hbase.KeyValue;
|
||||
import org.apache.hbase.cell.CellComparator;
|
||||
import org.apache.hbase.codec.prefixtree.PrefixTreeBlockMeta;
|
||||
import org.apache.hbase.codec.prefixtree.scanner.CellSearcher;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
|
||||
public abstract class BaseTestRowData implements TestRowData {
|
||||
|
||||
@Override
|
||||
public List<Integer> getRowStartIndexes() {
|
||||
List<Integer> rowStartIndexes = Lists.newArrayList();
|
||||
rowStartIndexes.add(0);
|
||||
List<KeyValue> inputs = getInputs();
|
||||
for (int i = 1; i < inputs.size(); ++i) {
|
||||
KeyValue lastKv = inputs.get(i - 1);
|
||||
KeyValue kv = inputs.get(i);
|
||||
if (!CellComparator.equalsRow(lastKv, kv)) {
|
||||
rowStartIndexes.add(i);
|
||||
}
|
||||
}
|
||||
return rowStartIndexes;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void individualBlockMetaAssertions(PrefixTreeBlockMeta blockMeta) {
|
||||
}
|
||||
|
||||
@Override
|
||||
public void individualSearcherAssertions(CellSearcher searcher) {
|
||||
}
|
||||
}
|
|
@ -0,0 +1,191 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hbase.codec.prefixtree.row;
|
||||
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.hadoop.hbase.KeyValue;
|
||||
import org.apache.hadoop.hbase.KeyValueTool;
|
||||
import org.apache.hadoop.hbase.util.CollectionUtils;
|
||||
import org.apache.hbase.Cell;
|
||||
import org.apache.hbase.cell.CellComparator;
|
||||
import org.apache.hbase.cell.CellScannerPosition;
|
||||
import org.apache.hbase.codec.prefixtree.decode.DecoderFactory;
|
||||
import org.apache.hbase.codec.prefixtree.encode.PrefixTreeEncoder;
|
||||
import org.apache.hbase.codec.prefixtree.scanner.CellSearcher;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
import org.junit.runner.RunWith;
|
||||
import org.junit.runners.Parameterized;
|
||||
import org.junit.runners.Parameterized.Parameters;
|
||||
|
||||
@RunWith(Parameterized.class)
|
||||
public class TestPrefixTreeSearcher {
|
||||
|
||||
protected static int BLOCK_START = 7;
|
||||
|
||||
@Parameters
|
||||
public static Collection<Object[]> parameters() {
|
||||
return new TestRowData.InMemory().getAllAsObjectArray();
|
||||
}
|
||||
|
||||
protected TestRowData rows;
|
||||
protected ByteBuffer block;
|
||||
|
||||
public TestPrefixTreeSearcher(TestRowData testRows) throws IOException {
|
||||
this.rows = testRows;
|
||||
ByteArrayOutputStream os = new ByteArrayOutputStream(1 << 20);
|
||||
PrefixTreeEncoder kvBuilder = new PrefixTreeEncoder(os, true);
|
||||
for (KeyValue kv : rows.getInputs()) {
|
||||
kvBuilder.write(kv);
|
||||
}
|
||||
kvBuilder.flush();
|
||||
byte[] outputBytes = os.toByteArray();
|
||||
this.block = ByteBuffer.wrap(outputBytes);
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testScanForwards() throws IOException {
|
||||
CellSearcher searcher = null;
|
||||
try {
|
||||
searcher = DecoderFactory.checkOut(block, true);
|
||||
|
||||
int i = -1;
|
||||
while (searcher.next()) {
|
||||
++i;
|
||||
KeyValue inputCell = rows.getInputs().get(i);
|
||||
Cell outputCell = searcher.getCurrent();
|
||||
|
||||
// check all 3 permutations of equals()
|
||||
Assert.assertEquals(inputCell, outputCell);
|
||||
Assert.assertEquals(outputCell, inputCell);
|
||||
Assert.assertTrue(CellComparator.equals(inputCell, outputCell));
|
||||
}
|
||||
Assert.assertEquals(rows.getInputs().size(), i + 1);
|
||||
} finally {
|
||||
DecoderFactory.checkIn(searcher);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testScanBackwards() throws IOException {
|
||||
CellSearcher searcher = null;
|
||||
try {
|
||||
searcher = DecoderFactory.checkOut(block, true);
|
||||
searcher.positionAfterLastCell();
|
||||
int i = -1;
|
||||
while (searcher.previous()) {
|
||||
++i;
|
||||
int oppositeIndex = rows.getInputs().size() - i - 1;
|
||||
KeyValue inputKv = rows.getInputs().get(oppositeIndex);
|
||||
KeyValue outputKv = KeyValueTool.copyToNewKeyValue(searcher.getCurrent());
|
||||
Assert.assertEquals(inputKv, outputKv);
|
||||
}
|
||||
Assert.assertEquals(rows.getInputs().size(), i + 1);
|
||||
} finally {
|
||||
DecoderFactory.checkIn(searcher);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testRandomSeekHits() throws IOException {
|
||||
CellSearcher searcher = null;
|
||||
try {
|
||||
searcher = DecoderFactory.checkOut(block, true);
|
||||
for (KeyValue kv : rows.getInputs()) {
|
||||
boolean hit = searcher.positionAt(kv);
|
||||
Assert.assertTrue(hit);
|
||||
Cell foundKv = searcher.getCurrent();
|
||||
Assert.assertTrue(CellComparator.equals(kv, foundKv));
|
||||
}
|
||||
} finally {
|
||||
DecoderFactory.checkIn(searcher);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* very hard to test nubs with this thing since the a nextRowKey function will usually skip them
|
||||
*/
|
||||
@Test
|
||||
public void testRandomSeekMisses() throws IOException {
|
||||
CellSearcher searcher = null;
|
||||
List<Integer> rowStartIndexes = rows.getRowStartIndexes();
|
||||
try {
|
||||
searcher = DecoderFactory.checkOut(block, true);
|
||||
for (int i=0; i < rows.getInputs().size(); ++i) {
|
||||
KeyValue kv = rows.getInputs().get(i);
|
||||
|
||||
//nextRow
|
||||
KeyValue inputNextRow = KeyValueTool.createFirstKeyInNextRow(kv);
|
||||
|
||||
CellScannerPosition position = searcher.positionAtOrBefore(inputNextRow);
|
||||
boolean isFirstInRow = rowStartIndexes.contains(i);
|
||||
if(isFirstInRow){
|
||||
int rowIndex = rowStartIndexes.indexOf(i);
|
||||
if(rowIndex < rowStartIndexes.size() - 1){
|
||||
// int lastKvInRowI = rowStartIndexes.get(rowIndex + 1) - 1;
|
||||
Assert.assertEquals(CellScannerPosition.BEFORE, position);
|
||||
/*
|
||||
* Can't get this to work between nubs like rowB\x00 <-> rowBB
|
||||
*
|
||||
* No reason to doubt that it works, but will have to come up with a smarter test.
|
||||
*/
|
||||
// Assert.assertEquals(rows.getInputs().get(lastKvInRowI), searcher.getCurrentCell());
|
||||
}
|
||||
}
|
||||
|
||||
//previous KV
|
||||
KeyValue inputPreviousKv = KeyValueTool.previousKey(kv);
|
||||
boolean hit = searcher.positionAt(inputPreviousKv);
|
||||
Assert.assertFalse(hit);
|
||||
position = searcher.positionAtOrAfter(inputPreviousKv);
|
||||
if(CollectionUtils.isLastIndex(rows.getInputs(), i)){
|
||||
Assert.assertTrue(CellScannerPosition.AFTER_LAST == position);
|
||||
}else{
|
||||
Assert.assertTrue(CellScannerPosition.AFTER == position);
|
||||
/*
|
||||
* TODO: why i+1 instead of i?
|
||||
*/
|
||||
Assert.assertEquals(rows.getInputs().get(i+1), searcher.getCurrent());
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
DecoderFactory.checkIn(searcher);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testRandomSeekIndividualAssertions() throws IOException {
|
||||
CellSearcher searcher = null;
|
||||
try {
|
||||
searcher = DecoderFactory.checkOut(block, true);
|
||||
rows.individualSearcherAssertions(searcher);
|
||||
} finally {
|
||||
DecoderFactory.checkIn(searcher);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,97 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hbase.codec.prefixtree.row;
|
||||
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.hadoop.hbase.KeyValue;
|
||||
import org.apache.hbase.codec.prefixtree.PrefixTreeBlockMeta;
|
||||
import org.apache.hbase.codec.prefixtree.row.data.TestRowDataComplexQualifiers;
|
||||
import org.apache.hbase.codec.prefixtree.row.data.TestRowDataDeeper;
|
||||
import org.apache.hbase.codec.prefixtree.row.data.TestRowDataDifferentTimestamps;
|
||||
import org.apache.hbase.codec.prefixtree.row.data.TestRowDataEmpty;
|
||||
import org.apache.hbase.codec.prefixtree.row.data.TestRowDataExerciseFInts;
|
||||
import org.apache.hbase.codec.prefixtree.row.data.TestRowDataNub;
|
||||
import org.apache.hbase.codec.prefixtree.row.data.TestRowDataNumberStrings;
|
||||
import org.apache.hbase.codec.prefixtree.row.data.TestRowDataQualifierByteOrdering;
|
||||
import org.apache.hbase.codec.prefixtree.row.data.TestRowDataRandomKeyValues;
|
||||
import org.apache.hbase.codec.prefixtree.row.data.TestRowDataSearcherRowMiss;
|
||||
import org.apache.hbase.codec.prefixtree.row.data.TestRowDataSimple;
|
||||
import org.apache.hbase.codec.prefixtree.row.data.TestRowDataSingleQualifier;
|
||||
import org.apache.hbase.codec.prefixtree.row.data.TestRowDataTrivial;
|
||||
import org.apache.hbase.codec.prefixtree.row.data.TestRowDataUrls;
|
||||
import org.apache.hbase.codec.prefixtree.row.data.TestRowDataUrlsExample;
|
||||
import org.apache.hbase.codec.prefixtree.scanner.CellSearcher;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
|
||||
/*
|
||||
* A master class for registering different implementations of TestRowData.
|
||||
*/
|
||||
public interface TestRowData {
|
||||
|
||||
List<KeyValue> getInputs();
|
||||
List<Integer> getRowStartIndexes();
|
||||
|
||||
void individualBlockMetaAssertions(PrefixTreeBlockMeta blockMeta);
|
||||
|
||||
void individualSearcherAssertions(CellSearcher searcher);
|
||||
|
||||
public static class InMemory {
|
||||
|
||||
/*
|
||||
* The following are different styles of data that the codec may encounter. Having these small
|
||||
* representations of the data helps pinpoint what is wrong if the encoder breaks.
|
||||
*/
|
||||
public static Collection<TestRowData> getAll() {
|
||||
List<TestRowData> all = Lists.newArrayList();
|
||||
//simple
|
||||
all.add(new TestRowDataEmpty());
|
||||
all.add(new TestRowDataTrivial());
|
||||
all.add(new TestRowDataSimple());
|
||||
all.add(new TestRowDataDeeper());
|
||||
|
||||
//more specific
|
||||
all.add(new TestRowDataSingleQualifier());
|
||||
// all.add(new TestRowDataMultiFamilies());//multiple families disabled in PrefixTreeEncoder
|
||||
all.add(new TestRowDataNub());
|
||||
all.add(new TestRowDataSearcherRowMiss());
|
||||
all.add(new TestRowDataQualifierByteOrdering());
|
||||
all.add(new TestRowDataComplexQualifiers());
|
||||
all.add(new TestRowDataDifferentTimestamps());
|
||||
|
||||
//larger data volumes (hard to debug)
|
||||
all.add(new TestRowDataNumberStrings());
|
||||
all.add(new TestRowDataUrls());
|
||||
all.add(new TestRowDataUrlsExample());
|
||||
all.add(new TestRowDataExerciseFInts());
|
||||
all.add(new TestRowDataRandomKeyValues());
|
||||
return all;
|
||||
}
|
||||
|
||||
public static Collection<Object[]> getAllAsObjectArray() {
|
||||
List<Object[]> all = Lists.newArrayList();
|
||||
for (TestRowData testRows : getAll()) {
|
||||
all.add(new Object[] { testRows });
|
||||
}
|
||||
return all;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,186 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hbase.codec.prefixtree.row;
|
||||
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.hadoop.hbase.KeyValue;
|
||||
import org.apache.hadoop.hbase.KeyValueTool;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
import org.apache.hbase.Cell;
|
||||
import org.apache.hbase.codec.prefixtree.PrefixTreeBlockMeta;
|
||||
import org.apache.hbase.codec.prefixtree.decode.PrefixTreeArraySearcher;
|
||||
import org.apache.hbase.codec.prefixtree.encode.PrefixTreeEncoder;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
import org.junit.runner.RunWith;
|
||||
import org.junit.runners.Parameterized;
|
||||
import org.junit.runners.Parameterized.Parameters;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
|
||||
@RunWith(Parameterized.class)
|
||||
public class TestRowEncoder {
|
||||
|
||||
protected static int BLOCK_START = 7;
|
||||
|
||||
@Parameters
|
||||
public static Collection<Object[]> parameters() {
|
||||
List<Object[]> parameters = Lists.newArrayList();
|
||||
for (TestRowData testRows : TestRowData.InMemory.getAll()) {
|
||||
parameters.add(new Object[] { testRows });
|
||||
}
|
||||
return parameters;
|
||||
}
|
||||
|
||||
protected TestRowData rows;
|
||||
protected List<KeyValue> inputKvs;
|
||||
protected boolean includeMemstoreTS = true;
|
||||
protected ByteArrayOutputStream os;
|
||||
protected PrefixTreeEncoder encoder;
|
||||
protected int totalBytes;
|
||||
protected PrefixTreeBlockMeta blockMetaWriter;
|
||||
protected byte[] outputBytes;
|
||||
protected ByteBuffer buffer;
|
||||
protected ByteArrayInputStream is;
|
||||
protected PrefixTreeBlockMeta blockMetaReader;
|
||||
protected byte[] inputBytes;
|
||||
protected PrefixTreeArraySearcher searcher;
|
||||
|
||||
public TestRowEncoder(TestRowData testRows) {
|
||||
this.rows = testRows;
|
||||
}
|
||||
|
||||
@Before
|
||||
public void compile() throws IOException {
|
||||
os = new ByteArrayOutputStream(1 << 20);
|
||||
encoder = new PrefixTreeEncoder(os, includeMemstoreTS);
|
||||
|
||||
inputKvs = rows.getInputs();
|
||||
for (KeyValue kv : inputKvs) {
|
||||
encoder.write(kv);
|
||||
}
|
||||
encoder.flush();
|
||||
totalBytes = encoder.getTotalBytes();
|
||||
blockMetaWriter = encoder.getBlockMeta();
|
||||
outputBytes = os.toByteArray();
|
||||
|
||||
// start reading, but save the assertions for @Test methods
|
||||
buffer = ByteBuffer.wrap(outputBytes);
|
||||
blockMetaReader = new PrefixTreeBlockMeta(buffer);
|
||||
|
||||
searcher = new PrefixTreeArraySearcher(blockMetaReader, blockMetaReader.getRowTreeDepth(),
|
||||
blockMetaReader.getMaxRowLength(), blockMetaReader.getMaxQualifierLength());
|
||||
searcher.initOnBlock(blockMetaReader, outputBytes, includeMemstoreTS);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testEncoderOutput() throws IOException {
|
||||
Assert.assertEquals(totalBytes, outputBytes.length);
|
||||
Assert.assertEquals(blockMetaWriter, blockMetaReader);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testForwardScanner() {
|
||||
int counter = -1;
|
||||
while (searcher.next()) {
|
||||
++counter;
|
||||
KeyValue inputKv = rows.getInputs().get(counter);
|
||||
KeyValue outputKv = KeyValueTool.copyToNewKeyValue(searcher.getCurrent());
|
||||
assertKeyAndValueEqual(inputKv, outputKv);
|
||||
}
|
||||
// assert same number of cells
|
||||
Assert.assertEquals(rows.getInputs().size(), counter + 1);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* probably not needed since testReverseScannerWithJitter() below is more thorough
|
||||
*/
|
||||
@Test
|
||||
public void testReverseScanner() {
|
||||
searcher.positionAfterLastCell();
|
||||
int counter = -1;
|
||||
while (searcher.previous()) {
|
||||
++counter;
|
||||
int oppositeIndex = rows.getInputs().size() - counter - 1;
|
||||
KeyValue inputKv = rows.getInputs().get(oppositeIndex);
|
||||
KeyValue outputKv = KeyValueTool.copyToNewKeyValue(searcher.getCurrent());
|
||||
assertKeyAndValueEqual(inputKv, outputKv);
|
||||
}
|
||||
Assert.assertEquals(rows.getInputs().size(), counter + 1);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Exercise the nubCellsRemain variable by calling next+previous. NubCellsRemain is basically
|
||||
* a special fan index.
|
||||
*/
|
||||
@Test
|
||||
public void testReverseScannerWithJitter() {
|
||||
searcher.positionAfterLastCell();
|
||||
int counter = -1;
|
||||
while (true) {
|
||||
boolean foundCell = searcher.previous();
|
||||
if (!foundCell) {
|
||||
break;
|
||||
}
|
||||
++counter;
|
||||
|
||||
// a next+previous should cancel out
|
||||
if (!searcher.isAfterLast()) {
|
||||
searcher.next();
|
||||
searcher.previous();
|
||||
}
|
||||
|
||||
int oppositeIndex = rows.getInputs().size() - counter - 1;
|
||||
KeyValue inputKv = rows.getInputs().get(oppositeIndex);
|
||||
KeyValue outputKv = KeyValueTool.copyToNewKeyValue(searcher.getCurrent());
|
||||
assertKeyAndValueEqual(inputKv, outputKv);
|
||||
}
|
||||
Assert.assertEquals(rows.getInputs().size(), counter + 1);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testIndividualBlockMetaAssertions() {
|
||||
rows.individualBlockMetaAssertions(blockMetaReader);
|
||||
}
|
||||
|
||||
|
||||
/**************** helper **************************/
|
||||
|
||||
protected void assertKeyAndValueEqual(Cell expected, Cell actual) {
|
||||
// assert keys are equal (doesn't compare values)
|
||||
Assert.assertEquals(expected, actual);
|
||||
if (includeMemstoreTS) {
|
||||
Assert.assertEquals(expected.getMvccVersion(), actual.getMvccVersion());
|
||||
}
|
||||
// assert values equal
|
||||
Assert.assertTrue(Bytes.equals(expected.getValueArray(), expected.getValueOffset(),
|
||||
expected.getValueLength(), actual.getValueArray(), actual.getValueOffset(),
|
||||
actual.getValueLength()));
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,67 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hbase.codec.prefixtree.row.data;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.hadoop.hbase.KeyValue;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
import org.apache.hbase.codec.prefixtree.PrefixTreeTestConstants;
|
||||
import org.apache.hbase.codec.prefixtree.row.BaseTestRowData;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
|
||||
public class TestRowDataComplexQualifiers extends BaseTestRowData{
|
||||
|
||||
static byte[]
|
||||
Arow = Bytes.toBytes("Arow"),
|
||||
cf = PrefixTreeTestConstants.TEST_CF,
|
||||
v0 = Bytes.toBytes("v0");
|
||||
|
||||
static List<byte[]> qualifiers = Lists.newArrayList();
|
||||
static {
|
||||
List<String> qualifierStrings = Lists.newArrayList();
|
||||
qualifierStrings.add("cq");
|
||||
qualifierStrings.add("cq0");
|
||||
qualifierStrings.add("cq1");
|
||||
qualifierStrings.add("cq2");
|
||||
qualifierStrings.add("dq0");// second root level fan
|
||||
qualifierStrings.add("dq1");// nub
|
||||
qualifierStrings.add("dq111");// leaf on nub
|
||||
qualifierStrings.add("dq11111a");// leaf on leaf
|
||||
for (String s : qualifierStrings) {
|
||||
qualifiers.add(Bytes.toBytes(s));
|
||||
}
|
||||
}
|
||||
|
||||
static long ts = 55L;
|
||||
|
||||
static List<KeyValue> d = Lists.newArrayList();
|
||||
static {
|
||||
for (byte[] qualifier : qualifiers) {
|
||||
d.add(new KeyValue(Arow, cf, qualifier, ts, v0));
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<KeyValue> getInputs() {
|
||||
return d;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,84 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hbase.codec.prefixtree.row.data;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.hadoop.hbase.KeyValue;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
import org.apache.hbase.cell.CellScannerPosition;
|
||||
import org.apache.hbase.codec.prefixtree.PrefixTreeBlockMeta;
|
||||
import org.apache.hbase.codec.prefixtree.row.BaseTestRowData;
|
||||
import org.apache.hbase.codec.prefixtree.scanner.CellSearcher;
|
||||
import org.junit.Assert;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
|
||||
/*
|
||||
* Goes beyond a trivial trie to add a branch on the "cf" node
|
||||
*/
|
||||
public class TestRowDataDeeper extends BaseTestRowData{
|
||||
|
||||
static byte[]
|
||||
cdc = Bytes.toBytes("cdc"),
|
||||
cf6 = Bytes.toBytes("cf6"),
|
||||
cfc = Bytes.toBytes("cfc"),
|
||||
f = Bytes.toBytes("f"),
|
||||
q = Bytes.toBytes("q"),
|
||||
v = Bytes.toBytes("v");
|
||||
|
||||
static long
|
||||
ts = 55L;
|
||||
|
||||
static List<KeyValue> d = Lists.newArrayList();
|
||||
static{
|
||||
d.add(new KeyValue(cdc, f, q, ts, v));
|
||||
d.add(new KeyValue(cf6, f, q, ts, v));
|
||||
d.add(new KeyValue(cfc, f, q, ts, v));
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<KeyValue> getInputs() {
|
||||
return d;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void individualBlockMetaAssertions(PrefixTreeBlockMeta blockMeta) {
|
||||
//0: token:c; fan:d,f
|
||||
//1: token:f; fan:6,c
|
||||
//2: leaves
|
||||
Assert.assertEquals(3, blockMeta.getRowTreeDepth());
|
||||
}
|
||||
|
||||
@Override
|
||||
public void individualSearcherAssertions(CellSearcher searcher) {
|
||||
/**
|
||||
* The searcher should get a token mismatch on the "r" branch. Assert that it skips not only
|
||||
* rA, but rB as well.
|
||||
*/
|
||||
KeyValue cfcRow = KeyValue.createFirstOnRow(Bytes.toBytes("cfc"));
|
||||
CellScannerPosition position = searcher.positionAtOrAfter(cfcRow);
|
||||
Assert.assertEquals(CellScannerPosition.AFTER, position);
|
||||
Assert.assertEquals(d.get(2), searcher.getCurrent());
|
||||
searcher.previous();
|
||||
Assert.assertEquals(d.get(1), searcher.getCurrent());
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,94 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hbase.codec.prefixtree.row.data;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.hadoop.hbase.KeyValue;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
import org.apache.hbase.codec.prefixtree.PrefixTreeBlockMeta;
|
||||
import org.apache.hbase.codec.prefixtree.row.BaseTestRowData;
|
||||
import org.junit.Assert;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
|
||||
/*
|
||||
* test different timestamps
|
||||
*/
|
||||
public class TestRowDataDifferentTimestamps extends BaseTestRowData{
|
||||
|
||||
static byte[]
|
||||
Arow = Bytes.toBytes("Arow"),
|
||||
Brow = Bytes.toBytes("Brow"),
|
||||
cf = Bytes.toBytes("fammy"),
|
||||
cq0 = Bytes.toBytes("cq0"),
|
||||
cq1 = Bytes.toBytes("cq1"),
|
||||
v0 = Bytes.toBytes("v0");
|
||||
|
||||
static List<KeyValue> d = Lists.newArrayList();
|
||||
static{
|
||||
KeyValue kv0 = new KeyValue(Arow, cf, cq0, 0L, v0);
|
||||
kv0.setMvccVersion(123456789L);
|
||||
d.add(kv0);
|
||||
|
||||
KeyValue kv1 = new KeyValue(Arow, cf, cq1, 1L, v0);
|
||||
kv1.setMvccVersion(3L);
|
||||
d.add(kv1);
|
||||
|
||||
KeyValue kv2 = new KeyValue(Brow, cf, cq0, 12345678L, v0);
|
||||
kv2.setMvccVersion(65537L);
|
||||
d.add(kv2);
|
||||
|
||||
//watch out... Long.MAX_VALUE comes back as 1332221664203, even with other encoders
|
||||
// d.add(new KeyValue(Brow, cf, cq1, Long.MAX_VALUE, v0));
|
||||
KeyValue kv3 = new KeyValue(Brow, cf, cq1, Long.MAX_VALUE-1, v0);
|
||||
kv3.setMvccVersion(1L);
|
||||
d.add(kv3);
|
||||
|
||||
KeyValue kv4 = new KeyValue(Brow, cf, cq1, 999999999, v0);
|
||||
//don't set memstoreTS
|
||||
d.add(kv4);
|
||||
|
||||
KeyValue kv5 = new KeyValue(Brow, cf, cq1, 12345, v0);
|
||||
kv5.setMvccVersion(0L);
|
||||
d.add(kv5);
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<KeyValue> getInputs() {
|
||||
return d;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void individualBlockMetaAssertions(PrefixTreeBlockMeta blockMeta) {
|
||||
Assert.assertTrue(blockMeta.getNumMvccVersionBytes() > 0);
|
||||
Assert.assertEquals(12, blockMeta.getNumValueBytes());
|
||||
|
||||
Assert.assertFalse(blockMeta.isAllSameTimestamp());
|
||||
Assert.assertNotNull(blockMeta.getMinTimestamp());
|
||||
Assert.assertTrue(blockMeta.getTimestampIndexWidth() > 0);
|
||||
Assert.assertTrue(blockMeta.getTimestampDeltaWidth() > 0);
|
||||
|
||||
Assert.assertFalse(blockMeta.isAllSameMvccVersion());
|
||||
Assert.assertNotNull(blockMeta.getMinMvccVersion());
|
||||
Assert.assertTrue(blockMeta.getMvccVersionIndexWidth() > 0);
|
||||
Assert.assertTrue(blockMeta.getMvccVersionDeltaWidth() > 0);
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,43 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hbase.codec.prefixtree.row.data;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.hadoop.hbase.KeyValue;
|
||||
import org.apache.hadoop.hbase.KeyValue.Type;
|
||||
import org.apache.hbase.codec.prefixtree.row.BaseTestRowData;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
|
||||
public class TestRowDataEmpty extends BaseTestRowData{
|
||||
|
||||
private static byte[] b = new byte[0];
|
||||
|
||||
static List<KeyValue> d = Lists.newArrayList();
|
||||
static {
|
||||
d.add(new KeyValue(b, b, b, 0L, Type.Put, b));
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<KeyValue> getInputs() {
|
||||
return d;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,114 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hbase.codec.prefixtree.row.data;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.hadoop.hbase.KeyValue;
|
||||
import org.apache.hadoop.hbase.util.ByteRange;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
import org.apache.hbase.codec.prefixtree.PrefixTreeBlockMeta;
|
||||
import org.apache.hbase.codec.prefixtree.PrefixTreeTestConstants;
|
||||
import org.apache.hbase.codec.prefixtree.row.BaseTestRowData;
|
||||
import org.apache.hbase.util.byterange.impl.ByteRangeTreeSet;
|
||||
import org.junit.Assert;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
|
||||
/*
|
||||
* test different timestamps
|
||||
*
|
||||
* http://pastebin.com/7ks8kzJ2
|
||||
* http://pastebin.com/MPn03nsK
|
||||
*/
|
||||
public class TestRowDataExerciseFInts extends BaseTestRowData{
|
||||
|
||||
static List<ByteRange> rows;
|
||||
static{
|
||||
List<String> rowStrings = new ArrayList<String>();
|
||||
rowStrings.add("com.edsBlog/directoryAa/pageAaa");
|
||||
rowStrings.add("com.edsBlog/directoryAa/pageBbb");
|
||||
rowStrings.add("com.edsBlog/directoryAa/pageCcc");
|
||||
rowStrings.add("com.edsBlog/directoryAa/pageDdd");
|
||||
rowStrings.add("com.edsBlog/directoryBb/pageEee");
|
||||
rowStrings.add("com.edsBlog/directoryBb/pageFff");
|
||||
rowStrings.add("com.edsBlog/directoryBb/pageGgg");
|
||||
rowStrings.add("com.edsBlog/directoryBb/pageHhh");
|
||||
rowStrings.add("com.isabellasBlog/directoryAa/pageAaa");
|
||||
rowStrings.add("com.isabellasBlog/directoryAa/pageBbb");
|
||||
rowStrings.add("com.isabellasBlog/directoryAa/pageCcc");
|
||||
rowStrings.add("com.isabellasBlog/directoryAa/pageDdd");
|
||||
rowStrings.add("com.isabellasBlog/directoryBb/pageEee");
|
||||
rowStrings.add("com.isabellasBlog/directoryBb/pageFff");
|
||||
rowStrings.add("com.isabellasBlog/directoryBb/pageGgg");
|
||||
rowStrings.add("com.isabellasBlog/directoryBb/pageHhh");
|
||||
ByteRangeTreeSet ba = new ByteRangeTreeSet();
|
||||
for(String row : rowStrings){
|
||||
ba.add(new ByteRange(Bytes.toBytes(row)));
|
||||
}
|
||||
rows = ba.compile().getSortedRanges();
|
||||
}
|
||||
|
||||
static List<String> cols = Lists.newArrayList();
|
||||
static{
|
||||
cols.add("Chrome");
|
||||
cols.add("Chromeb");
|
||||
cols.add("Firefox");
|
||||
cols.add("InternetExplorer");
|
||||
cols.add("Opera");
|
||||
cols.add("Safari");
|
||||
cols.add("Z1stBrowserWithHuuuuuuuuuuuugeQualifier");
|
||||
cols.add("Z2ndBrowserWithEvenBiggerQualifierMoreMoreMoreMoreMore");
|
||||
cols.add("Z3rdBrowserWithEvenBiggerQualifierMoreMoreMoreMoreMore");
|
||||
cols.add("Z4thBrowserWithEvenBiggerQualifierMoreMoreMoreMoreMore");
|
||||
cols.add("Z5thBrowserWithEvenBiggerQualifierMoreMoreMoreMoreMore");
|
||||
cols.add("Z6thBrowserWithEvenBiggerQualifierMoreMoreMoreMoreMore");
|
||||
cols.add("Z7thBrowserWithEvenBiggerQualifierMoreMoreMoreMoreMore");
|
||||
cols.add("Z8thBrowserWithEvenBiggerQualifierMoreMoreMoreMoreMore");
|
||||
cols.add("Z9thBrowserWithEvenBiggerQualifierMoreMoreMoreMoreMore");
|
||||
}
|
||||
|
||||
static long ts = 1234567890;
|
||||
|
||||
static int MAX_VALUE = 50;
|
||||
|
||||
static List<KeyValue> kvs = Lists.newArrayList();
|
||||
static {
|
||||
for (ByteRange row : rows) {
|
||||
for (String col : cols) {
|
||||
KeyValue kv = new KeyValue(row.deepCopyToNewArray(), PrefixTreeTestConstants.TEST_CF,
|
||||
Bytes.toBytes(col), ts, KeyValue.Type.Put, Bytes.toBytes("VALUE"));
|
||||
kvs.add(kv);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<KeyValue> getInputs() {
|
||||
return kvs;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void individualBlockMetaAssertions(PrefixTreeBlockMeta blockMeta) {
|
||||
Assert.assertTrue(blockMeta.getNextNodeOffsetWidth() > 1);
|
||||
Assert.assertTrue(blockMeta.getQualifierOffsetWidth() > 1);
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,60 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hbase.codec.prefixtree.row.data;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.hadoop.hbase.KeyValue;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
import org.apache.hbase.codec.prefixtree.row.BaseTestRowData;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
|
||||
public class TestRowDataMultiFamilies extends BaseTestRowData{
|
||||
|
||||
static byte[]
|
||||
rowA = Bytes.toBytes("rowA"),
|
||||
rowB = Bytes.toBytes("rowB"),
|
||||
famA = Bytes.toBytes("famA"),
|
||||
famB = Bytes.toBytes("famB"),
|
||||
famBB = Bytes.toBytes("famBB"),
|
||||
q0 = Bytes.toBytes("q0"),
|
||||
q1 = Bytes.toBytes("q1"),//start with a different character
|
||||
vvv = Bytes.toBytes("vvv");
|
||||
|
||||
static long ts = 55L;
|
||||
|
||||
static List<KeyValue> d = Lists.newArrayList();
|
||||
static {
|
||||
d.add(new KeyValue(rowA, famA, q0, ts, vvv));
|
||||
d.add(new KeyValue(rowA, famB, q1, ts, vvv));
|
||||
d.add(new KeyValue(rowA, famBB, q0, ts, vvv));
|
||||
d.add(new KeyValue(rowB, famA, q0, ts, vvv));
|
||||
d.add(new KeyValue(rowB, famA, q1, ts, vvv));
|
||||
d.add(new KeyValue(rowB, famB, q0, ts, vvv));
|
||||
d.add(new KeyValue(rowB, famBB, q0, ts, vvv));
|
||||
d.add(new KeyValue(rowB, famBB, q1, ts, vvv));
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<KeyValue> getInputs() {
|
||||
return d;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,59 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hbase.codec.prefixtree.row.data;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.hadoop.hbase.KeyValue;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
import org.apache.hbase.codec.prefixtree.PrefixTreeTestConstants;
|
||||
import org.apache.hbase.codec.prefixtree.row.BaseTestRowData;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
|
||||
public class TestRowDataNub extends BaseTestRowData{
|
||||
|
||||
static byte[]
|
||||
rowA = Bytes.toBytes("rowA"),
|
||||
rowB = Bytes.toBytes("rowB"),//nub
|
||||
rowBB = Bytes.toBytes("rowBB"),
|
||||
cf = PrefixTreeTestConstants.TEST_CF,
|
||||
cq0 = Bytes.toBytes("cq0"),
|
||||
cq1 = Bytes.toBytes("cq1"),
|
||||
v0 = Bytes.toBytes("v0");
|
||||
|
||||
static long
|
||||
ts = 55L;
|
||||
|
||||
static List<KeyValue> d = Lists.newArrayList();
|
||||
static{
|
||||
d.add(new KeyValue(rowA, cf, cq0, ts, v0));
|
||||
d.add(new KeyValue(rowA, cf, cq1, ts, v0));
|
||||
d.add(new KeyValue(rowB, cf, cq0, ts, v0));
|
||||
d.add(new KeyValue(rowB, cf, cq1, ts, v0));
|
||||
d.add(new KeyValue(rowBB, cf, cq0, ts, v0));
|
||||
d.add(new KeyValue(rowBB, cf, cq1, ts, v0));
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<KeyValue> getInputs() {
|
||||
return d;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,61 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hbase.codec.prefixtree.row.data;
|
||||
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.hadoop.hbase.KeyValue;
|
||||
import org.apache.hadoop.hbase.KeyValue.Type;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
import org.apache.hbase.cell.CellComparator;
|
||||
import org.apache.hbase.codec.prefixtree.row.BaseTestRowData;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
|
||||
public class TestRowDataNumberStrings extends BaseTestRowData{
|
||||
|
||||
static List<KeyValue> d = Lists.newArrayList();
|
||||
static {
|
||||
|
||||
/**
|
||||
* Test a string-encoded list of numbers. 0, 1, 10, 11 will sort as 0, 1, 10, 11 if strings
|
||||
* <p/>
|
||||
* This helped catch a bug with reverse scanning where it was jumping from the last leaf cell to
|
||||
* the previous nub. It should do 11->10, but it was incorrectly doing 11->1
|
||||
*/
|
||||
List<Integer> problematicSeries = Lists.newArrayList(0, 1, 10, 11);//sort this at the end
|
||||
for(Integer i : problematicSeries){
|
||||
// for(int i=0; i < 13; ++i){
|
||||
byte[] row = Bytes.toBytes(""+i);
|
||||
byte[] family = Bytes.toBytes("F");
|
||||
byte[] column = Bytes.toBytes("C");
|
||||
byte[] value = Bytes.toBytes("V");
|
||||
|
||||
d.add(new KeyValue(row, family, column, 0L, Type.Put, value));
|
||||
}
|
||||
Collections.sort(d, new CellComparator());
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<KeyValue> getInputs() {
|
||||
return d;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,58 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hbase.codec.prefixtree.row.data;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.hadoop.hbase.KeyValue;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
import org.apache.hbase.codec.prefixtree.row.BaseTestRowData;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
|
||||
public class TestRowDataQualifierByteOrdering extends BaseTestRowData{
|
||||
|
||||
static byte[]
|
||||
Arow = Bytes.toBytes("Arow"),
|
||||
Brow = Bytes.toBytes("Brow"),
|
||||
Brow2 = Bytes.toBytes("Brow2"),
|
||||
fam = Bytes.toBytes("HappyFam"),
|
||||
cq0 = Bytes.toBytes("cq0"),
|
||||
cq1 = Bytes.toBytes("cq1tail"),//make sure tail does not come back as liat
|
||||
cq2 = Bytes.toBytes("cq2"),
|
||||
v0 = Bytes.toBytes("v0");
|
||||
|
||||
static long ts = 55L;
|
||||
|
||||
static List<KeyValue> d = Lists.newArrayList();
|
||||
static {
|
||||
d.add(new KeyValue(Arow, fam, cq0, ts, v0));
|
||||
d.add(new KeyValue(Arow, fam, cq1, ts, v0));
|
||||
d.add(new KeyValue(Brow, fam, cq0, ts, v0));
|
||||
d.add(new KeyValue(Brow, fam, cq2, ts, v0));
|
||||
d.add(new KeyValue(Brow2, fam, cq1, ts, v0));
|
||||
d.add(new KeyValue(Brow2, fam, cq2, ts, v0));
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<KeyValue> getInputs() {
|
||||
return d;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,42 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hbase.codec.prefixtree.row.data;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.hadoop.hbase.KeyValue;
|
||||
import org.apache.hadoop.hbase.util.test.RedundantKVGenerator;
|
||||
import org.apache.hbase.codec.prefixtree.row.BaseTestRowData;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
|
||||
public class TestRowDataRandomKeyValues extends BaseTestRowData {
|
||||
|
||||
static List<KeyValue> d = Lists.newArrayList();
|
||||
static RedundantKVGenerator generator = new RedundantKVGenerator();
|
||||
static {
|
||||
d = generator.generateTestKeyValues(1 << 10);
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<KeyValue> getInputs() {
|
||||
return d;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,123 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hbase.codec.prefixtree.row.data;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.hadoop.hbase.KeyValue;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
import org.apache.hbase.Cell;
|
||||
import org.apache.hbase.cell.CellComparator;
|
||||
import org.apache.hbase.cell.CellScannerPosition;
|
||||
import org.apache.hbase.codec.prefixtree.row.BaseTestRowData;
|
||||
import org.apache.hbase.codec.prefixtree.scanner.CellSearcher;
|
||||
import org.junit.Assert;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
|
||||
public class TestRowDataSearcherRowMiss extends BaseTestRowData{
|
||||
|
||||
static byte[]
|
||||
//don't let the rows share any common prefix bytes
|
||||
A = Bytes.toBytes("A"),
|
||||
AA = Bytes.toBytes("AA"),
|
||||
AAA = Bytes.toBytes("AAA"),
|
||||
B = Bytes.toBytes("B"),
|
||||
cf = Bytes.toBytes("fam"),
|
||||
cq = Bytes.toBytes("cq0"),
|
||||
v = Bytes.toBytes("v0");
|
||||
|
||||
static long
|
||||
ts = 55L;
|
||||
|
||||
static List<KeyValue> d = Lists.newArrayList();
|
||||
static{
|
||||
d.add(new KeyValue(A, cf, cq, ts, v));
|
||||
d.add(new KeyValue(AA, cf, cq, ts, v));
|
||||
d.add(new KeyValue(AAA, cf, cq, ts, v));
|
||||
d.add(new KeyValue(B, cf, cq, ts, v));
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<KeyValue> getInputs() {
|
||||
return d;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void individualSearcherAssertions(CellSearcher searcher) {
|
||||
assertRowOffsetsCorrect();
|
||||
|
||||
searcher.resetToBeforeFirstEntry();
|
||||
|
||||
//test first cell
|
||||
searcher.next();
|
||||
Cell first = searcher.getCurrent();
|
||||
Assert.assertTrue(CellComparator.equals(d.get(0), first));
|
||||
|
||||
//test first cell in second row
|
||||
Assert.assertTrue(searcher.positionAt(d.get(1)));
|
||||
Assert.assertTrue(CellComparator.equals(d.get(1), searcher.getCurrent()));
|
||||
|
||||
testBetween1and2(searcher);
|
||||
testBetween2and3(searcher);
|
||||
}
|
||||
|
||||
/************ private methods, call from above *******************/
|
||||
|
||||
private void assertRowOffsetsCorrect(){
|
||||
Assert.assertEquals(4, getRowStartIndexes().size());
|
||||
}
|
||||
|
||||
private void testBetween1and2(CellSearcher searcher){
|
||||
CellScannerPosition p;//reuse
|
||||
Cell betweenAAndAAA = new KeyValue(AA, cf, cq, ts-2, v);
|
||||
|
||||
//test exact
|
||||
Assert.assertFalse(searcher.positionAt(betweenAAndAAA));
|
||||
|
||||
//test atOrBefore
|
||||
p = searcher.positionAtOrBefore(betweenAAndAAA);
|
||||
Assert.assertEquals(CellScannerPosition.BEFORE, p);
|
||||
Assert.assertTrue(CellComparator.equals(searcher.getCurrent(), d.get(1)));
|
||||
|
||||
//test atOrAfter
|
||||
p = searcher.positionAtOrAfter(betweenAAndAAA);
|
||||
Assert.assertEquals(CellScannerPosition.AFTER, p);
|
||||
Assert.assertTrue(CellComparator.equals(searcher.getCurrent(), d.get(2)));
|
||||
}
|
||||
|
||||
private void testBetween2and3(CellSearcher searcher){
|
||||
CellScannerPosition p;//reuse
|
||||
Cell betweenAAAndB = new KeyValue(AAA, cf, cq, ts-2, v);
|
||||
|
||||
//test exact
|
||||
Assert.assertFalse(searcher.positionAt(betweenAAAndB));
|
||||
|
||||
//test atOrBefore
|
||||
p = searcher.positionAtOrBefore(betweenAAAndB);
|
||||
Assert.assertEquals(CellScannerPosition.BEFORE, p);
|
||||
Assert.assertTrue(CellComparator.equals(searcher.getCurrent(), d.get(2)));
|
||||
|
||||
//test atOrAfter
|
||||
p = searcher.positionAtOrAfter(betweenAAAndB);
|
||||
Assert.assertEquals(CellScannerPosition.AFTER, p);
|
||||
Assert.assertTrue(CellComparator.equals(searcher.getCurrent(), d.get(3)));
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,112 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hbase.codec.prefixtree.row.data;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.hadoop.hbase.KeyValue;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
import org.apache.hadoop.hbase.util.CollectionUtils;
|
||||
import org.apache.hbase.Cell;
|
||||
import org.apache.hbase.cell.CellComparator;
|
||||
import org.apache.hbase.cell.CellScannerPosition;
|
||||
import org.apache.hbase.codec.prefixtree.row.BaseTestRowData;
|
||||
import org.apache.hbase.codec.prefixtree.scanner.CellSearcher;
|
||||
import org.junit.Assert;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
|
||||
public class TestRowDataSimple extends BaseTestRowData {
|
||||
|
||||
static byte[]
|
||||
// don't let the rows share any common prefix bytes
|
||||
rowA = Bytes.toBytes("Arow"),
|
||||
rowB = Bytes.toBytes("Brow"), cf = Bytes.toBytes("fam"),
|
||||
cq0 = Bytes.toBytes("cq0"),
|
||||
cq1 = Bytes.toBytes("cq1tail"),// make sure tail does not come back as liat
|
||||
cq2 = Bytes.toBytes("dcq2"),// start with a different character
|
||||
v0 = Bytes.toBytes("v0");
|
||||
|
||||
static long ts = 55L;
|
||||
|
||||
static List<KeyValue> d = Lists.newArrayList();
|
||||
static {
|
||||
d.add(new KeyValue(rowA, cf, cq0, ts, v0));
|
||||
d.add(new KeyValue(rowA, cf, cq1, ts, v0));
|
||||
d.add(new KeyValue(rowA, cf, cq2, ts, v0));
|
||||
d.add(new KeyValue(rowB, cf, cq0, ts, v0));
|
||||
d.add(new KeyValue(rowB, cf, cq1, ts, v0));
|
||||
d.add(new KeyValue(rowB, cf, cq2, ts, v0));
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<KeyValue> getInputs() {
|
||||
return d;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void individualSearcherAssertions(CellSearcher searcher) {
|
||||
CellScannerPosition p;// reuse
|
||||
searcher.resetToBeforeFirstEntry();
|
||||
|
||||
// test first cell
|
||||
searcher.next();
|
||||
Cell first = searcher.getCurrent();
|
||||
Assert.assertTrue(CellComparator.equals(d.get(0), first));
|
||||
|
||||
// test first cell in second row
|
||||
Assert.assertTrue(searcher.positionAt(d.get(3)));
|
||||
Assert.assertTrue(CellComparator.equals(d.get(3), searcher.getCurrent()));
|
||||
|
||||
Cell between4And5 = new KeyValue(rowB, cf, cq1, ts - 2, v0);
|
||||
|
||||
// test exact
|
||||
Assert.assertFalse(searcher.positionAt(between4And5));
|
||||
|
||||
// test atOrBefore
|
||||
p = searcher.positionAtOrBefore(between4And5);
|
||||
Assert.assertEquals(CellScannerPosition.BEFORE, p);
|
||||
Assert.assertTrue(CellComparator.equals(searcher.getCurrent(), d.get(4)));
|
||||
|
||||
// test atOrAfter
|
||||
p = searcher.positionAtOrAfter(between4And5);
|
||||
Assert.assertEquals(CellScannerPosition.AFTER, p);
|
||||
Assert.assertTrue(CellComparator.equals(searcher.getCurrent(), d.get(5)));
|
||||
|
||||
// test when key falls before first key in block
|
||||
Cell beforeFirst = new KeyValue(Bytes.toBytes("A"), cf, cq0, ts, v0);
|
||||
Assert.assertFalse(searcher.positionAt(beforeFirst));
|
||||
p = searcher.positionAtOrBefore(beforeFirst);
|
||||
Assert.assertEquals(CellScannerPosition.BEFORE_FIRST, p);
|
||||
p = searcher.positionAtOrAfter(beforeFirst);
|
||||
Assert.assertEquals(CellScannerPosition.AFTER, p);
|
||||
Assert.assertTrue(CellComparator.equals(searcher.getCurrent(), d.get(0)));
|
||||
Assert.assertEquals(d.get(0), searcher.getCurrent());
|
||||
|
||||
// test when key falls after last key in block
|
||||
Cell afterLast = new KeyValue(Bytes.toBytes("z"), cf, cq0, ts, v0);// must be lower case z
|
||||
Assert.assertFalse(searcher.positionAt(afterLast));
|
||||
p = searcher.positionAtOrAfter(afterLast);
|
||||
Assert.assertEquals(CellScannerPosition.AFTER_LAST, p);
|
||||
p = searcher.positionAtOrBefore(afterLast);
|
||||
Assert.assertEquals(CellScannerPosition.BEFORE, p);
|
||||
Assert.assertTrue(CellComparator.equals(searcher.getCurrent(), CollectionUtils.getLast(d)));
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,52 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hbase.codec.prefixtree.row.data;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.hadoop.hbase.KeyValue;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
import org.apache.hbase.codec.prefixtree.PrefixTreeTestConstants;
|
||||
import org.apache.hbase.codec.prefixtree.row.BaseTestRowData;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
|
||||
public class TestRowDataSingleQualifier extends BaseTestRowData{
|
||||
|
||||
static byte[]
|
||||
rowA = Bytes.toBytes("rowA"),
|
||||
rowB = Bytes.toBytes("rowB"),
|
||||
cf = PrefixTreeTestConstants.TEST_CF,
|
||||
cq0 = Bytes.toBytes("cq0"),
|
||||
v0 = Bytes.toBytes("v0");
|
||||
|
||||
static long ts = 55L;
|
||||
|
||||
static List<KeyValue> d = Lists.newArrayList();
|
||||
static {
|
||||
d.add(new KeyValue(rowA, cf, cq0, ts, v0));
|
||||
d.add(new KeyValue(rowB, cf, cq0, ts, v0));
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<KeyValue> getInputs() {
|
||||
return d;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,73 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hbase.codec.prefixtree.row.data;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.hadoop.hbase.KeyValue;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
import org.apache.hbase.cell.CellScannerPosition;
|
||||
import org.apache.hbase.codec.prefixtree.PrefixTreeBlockMeta;
|
||||
import org.apache.hbase.codec.prefixtree.row.BaseTestRowData;
|
||||
import org.apache.hbase.codec.prefixtree.scanner.CellSearcher;
|
||||
import org.junit.Assert;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
|
||||
public class TestRowDataTrivial extends BaseTestRowData{
|
||||
|
||||
static byte[]
|
||||
rA = Bytes.toBytes("rA"),
|
||||
rB = Bytes.toBytes("rB"),//turn "r" into a branch for the Searcher tests
|
||||
cf = Bytes.toBytes("fam"),
|
||||
cq0 = Bytes.toBytes("q0"),
|
||||
v0 = Bytes.toBytes("v0");
|
||||
|
||||
static long ts = 55L;
|
||||
|
||||
static List<KeyValue> d = Lists.newArrayList();
|
||||
static {
|
||||
d.add(new KeyValue(rA, cf, cq0, ts, v0));
|
||||
d.add(new KeyValue(rB, cf, cq0, ts, v0));
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<KeyValue> getInputs() {
|
||||
return d;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void individualBlockMetaAssertions(PrefixTreeBlockMeta blockMeta) {
|
||||
// node[0] -> root[r]
|
||||
// node[1] -> leaf[A], etc
|
||||
Assert.assertEquals(2, blockMeta.getRowTreeDepth());
|
||||
}
|
||||
|
||||
@Override
|
||||
public void individualSearcherAssertions(CellSearcher searcher) {
|
||||
/**
|
||||
* The searcher should get a token mismatch on the "r" branch. Assert that it skips not only rA,
|
||||
* but rB as well.
|
||||
*/
|
||||
KeyValue afterLast = KeyValue.createFirstOnRow(Bytes.toBytes("zzz"));
|
||||
CellScannerPosition position = searcher.positionAtOrAfter(afterLast);
|
||||
Assert.assertEquals(CellScannerPosition.AFTER_LAST, position);
|
||||
Assert.assertNull(searcher.getCurrent());
|
||||
}
|
||||
}
|
|
@ -0,0 +1,98 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hbase.codec.prefixtree.row.data;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.hadoop.hbase.KeyValue;
|
||||
import org.apache.hadoop.hbase.util.ByteRange;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
import org.apache.hbase.codec.prefixtree.PrefixTreeTestConstants;
|
||||
import org.apache.hbase.codec.prefixtree.row.BaseTestRowData;
|
||||
import org.apache.hbase.util.byterange.impl.ByteRangeTreeSet;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
|
||||
/*
|
||||
* test different timestamps
|
||||
*
|
||||
* http://pastebin.com/7ks8kzJ2
|
||||
* http://pastebin.com/MPn03nsK
|
||||
*/
|
||||
public class TestRowDataUrls extends BaseTestRowData{
|
||||
|
||||
static List<ByteRange> rows;
|
||||
static{
|
||||
List<String> rowStrings = new ArrayList<String>();
|
||||
rowStrings.add("com.edsBlog/directoryAa/pageAaa");
|
||||
rowStrings.add("com.edsBlog/directoryAa/pageBbb");
|
||||
rowStrings.add("com.edsBlog/directoryAa/pageCcc");
|
||||
rowStrings.add("com.edsBlog/directoryAa/pageDdd");
|
||||
rowStrings.add("com.edsBlog/directoryBb/pageEee");
|
||||
rowStrings.add("com.edsBlog/directoryBb/pageFff");
|
||||
rowStrings.add("com.edsBlog/directoryBb/pageGgg");
|
||||
rowStrings.add("com.edsBlog/directoryBb/pageHhh");
|
||||
rowStrings.add("com.isabellasBlog/directoryAa/pageAaa");
|
||||
rowStrings.add("com.isabellasBlog/directoryAa/pageBbb");
|
||||
rowStrings.add("com.isabellasBlog/directoryAa/pageCcc");
|
||||
rowStrings.add("com.isabellasBlog/directoryAa/pageDdd");
|
||||
rowStrings.add("com.isabellasBlog/directoryBb/pageEee");
|
||||
rowStrings.add("com.isabellasBlog/directoryBb/pageFff");
|
||||
rowStrings.add("com.isabellasBlog/directoryBb/pageGgg");
|
||||
rowStrings.add("com.isabellasBlog/directoryBb/pageHhh");
|
||||
ByteRangeTreeSet ba = new ByteRangeTreeSet();
|
||||
for (String row : rowStrings) {
|
||||
ba.add(new ByteRange(Bytes.toBytes(row)));
|
||||
}
|
||||
rows = ba.compile().getSortedRanges();
|
||||
}
|
||||
|
||||
static List<String> cols = Lists.newArrayList();
|
||||
static {
|
||||
cols.add("Chrome");
|
||||
cols.add("Chromeb");
|
||||
cols.add("Firefox");
|
||||
cols.add("InternetExplorer");
|
||||
cols.add("Opera");
|
||||
cols.add("Safari");
|
||||
}
|
||||
|
||||
static long ts = 1234567890;
|
||||
|
||||
static int MAX_VALUE = 50;
|
||||
|
||||
static List<KeyValue> kvs = Lists.newArrayList();
|
||||
static {
|
||||
for (ByteRange row : rows) {
|
||||
for (String col : cols) {
|
||||
KeyValue kv = new KeyValue(row.deepCopyToNewArray(), PrefixTreeTestConstants.TEST_CF,
|
||||
Bytes.toBytes(col), ts, KeyValue.Type.Put, Bytes.toBytes("VALUE"));
|
||||
kvs.add(kv);
|
||||
// System.out.println("TestRows5:"+kv);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<KeyValue> getInputs() {
|
||||
return kvs;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,126 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hbase.codec.prefixtree.row.data;
|
||||
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.hadoop.hbase.KeyValue;
|
||||
import org.apache.hadoop.hbase.KeyValueTestUtil;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
import org.apache.hbase.codec.prefixtree.encode.PrefixTreeEncoder;
|
||||
import org.apache.hbase.codec.prefixtree.encode.column.ColumnNodeWriter;
|
||||
import org.apache.hbase.codec.prefixtree.encode.row.RowNodeWriter;
|
||||
import org.apache.hbase.codec.prefixtree.encode.tokenize.TokenizerNode;
|
||||
import org.apache.hbase.codec.prefixtree.row.BaseTestRowData;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
|
||||
/*
|
||||
* test different timestamps
|
||||
*
|
||||
* http://pastebin.com/7ks8kzJ2
|
||||
* http://pastebin.com/MPn03nsK
|
||||
*/
|
||||
public class TestRowDataUrlsExample extends BaseTestRowData{
|
||||
|
||||
static String TENANT_ID = Integer.toString(95322);
|
||||
static String APP_ID = Integer.toString(12);
|
||||
static List<String> URLS = Lists.newArrayList(
|
||||
"com.dablog/2011/10/04/boating",
|
||||
"com.dablog/2011/10/09/lasers",
|
||||
"com.jamiesrecipes", //this nub helped find a bug
|
||||
"com.jamiesrecipes/eggs");
|
||||
static String FAMILY = "hits";
|
||||
static List<String> BROWSERS = Lists.newArrayList(
|
||||
"Chrome", "IE8", "IE9beta");//, "Opera", "Safari");
|
||||
static long TIMESTAMP = 1234567890;
|
||||
|
||||
static int MAX_VALUE = 50;
|
||||
|
||||
static List<KeyValue> kvs = Lists.newArrayList();
|
||||
static{
|
||||
for(String rowKey : URLS){
|
||||
for(String qualifier : BROWSERS){
|
||||
KeyValue kv = new KeyValue(
|
||||
Bytes.toBytes(rowKey),
|
||||
Bytes.toBytes(FAMILY),
|
||||
Bytes.toBytes(qualifier),
|
||||
TIMESTAMP,
|
||||
KeyValue.Type.Put,
|
||||
Bytes.toBytes("VvvV"));
|
||||
kvs.add(kv);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Used for generating docs.
|
||||
*/
|
||||
public static void main(String... args) throws IOException{
|
||||
System.out.println("-- inputs --");
|
||||
System.out.println(KeyValueTestUtil.toStringWithPadding(kvs, true));
|
||||
ByteArrayOutputStream os = new ByteArrayOutputStream(1<<20);
|
||||
PrefixTreeEncoder encoder = new PrefixTreeEncoder(os, false);
|
||||
|
||||
for(KeyValue kv : kvs){
|
||||
encoder.write(kv);
|
||||
}
|
||||
encoder.flush();
|
||||
|
||||
System.out.println("-- qualifier SortedPtBuilderNodes --");
|
||||
for(TokenizerNode tokenizer : encoder.getQualifierWriter().getNonLeaves()){
|
||||
System.out.println(tokenizer);
|
||||
}
|
||||
for(TokenizerNode tokenizerNode : encoder.getQualifierWriter().getLeaves()){
|
||||
System.out.println(tokenizerNode);
|
||||
}
|
||||
|
||||
System.out.println("-- qualifier PtColumnNodeWriters --");
|
||||
for(ColumnNodeWriter writer : encoder.getQualifierWriter().getColumnNodeWriters()){
|
||||
System.out.println(writer);
|
||||
}
|
||||
|
||||
System.out.println("-- rowKey SortedPtBuilderNodes --");
|
||||
for(TokenizerNode tokenizerNode : encoder.getRowWriter().getNonLeaves()){
|
||||
System.out.println(tokenizerNode);
|
||||
}
|
||||
for(TokenizerNode tokenizerNode : encoder.getRowWriter().getLeaves()){
|
||||
System.out.println(tokenizerNode);
|
||||
}
|
||||
|
||||
System.out.println("-- row PtRowNodeWriters --");
|
||||
for(RowNodeWriter writer : encoder.getRowWriter().getNonLeafWriters()){
|
||||
System.out.println(writer);
|
||||
}
|
||||
for(RowNodeWriter writer : encoder.getRowWriter().getLeafWriters()){
|
||||
System.out.println(writer);
|
||||
}
|
||||
|
||||
System.out.println("-- concatenated values --");
|
||||
System.out.println(Bytes.toStringBinary(encoder.getValueByteRange().deepCopyToNewArray()));
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<KeyValue> getInputs() {
|
||||
return kvs;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,45 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hbase.codec.prefixtree.timestamp;
|
||||
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.hbase.codec.prefixtree.timestamp.data.TestTimestampDataBasic;
|
||||
import org.apache.hbase.codec.prefixtree.timestamp.data.TestTimestampDataNumbers;
|
||||
import org.apache.hbase.codec.prefixtree.timestamp.data.TestTimestampDataRepeats;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
|
||||
public interface TestTimestampData {
|
||||
|
||||
List<Long> getInputs();
|
||||
long getMinimum();
|
||||
List<Long> getOutputs();
|
||||
|
||||
public static class InMemory {
|
||||
public Collection<Object[]> getAllAsObjectArray() {
|
||||
List<Object[]> all = Lists.newArrayList();
|
||||
all.add(new Object[] { new TestTimestampDataBasic() });
|
||||
all.add(new Object[] { new TestTimestampDataNumbers() });
|
||||
all.add(new Object[] { new TestTimestampDataRepeats() });
|
||||
return all;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,92 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hbase.codec.prefixtree.timestamp;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Collection;
|
||||
|
||||
import org.apache.hbase.codec.prefixtree.PrefixTreeBlockMeta;
|
||||
import org.apache.hbase.codec.prefixtree.decode.timestamp.TimestampDecoder;
|
||||
import org.apache.hbase.codec.prefixtree.encode.other.LongEncoder;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
import org.junit.runner.RunWith;
|
||||
import org.junit.runners.Parameterized;
|
||||
import org.junit.runners.Parameterized.Parameters;
|
||||
|
||||
@RunWith(Parameterized.class)
|
||||
public class TestTimestampEncoder {
|
||||
|
||||
@Parameters
|
||||
public static Collection<Object[]> parameters() {
|
||||
return new TestTimestampData.InMemory().getAllAsObjectArray();
|
||||
}
|
||||
|
||||
private TestTimestampData timestamps;
|
||||
private PrefixTreeBlockMeta blockMeta;
|
||||
private LongEncoder encoder;
|
||||
private byte[] bytes;
|
||||
private TimestampDecoder decoder;
|
||||
|
||||
public TestTimestampEncoder(TestTimestampData testTimestamps) throws IOException {
|
||||
this.timestamps = testTimestamps;
|
||||
this.blockMeta = new PrefixTreeBlockMeta();
|
||||
this.blockMeta.setNumMetaBytes(0);
|
||||
this.blockMeta.setNumRowBytes(0);
|
||||
this.blockMeta.setNumQualifierBytes(0);
|
||||
this.encoder = new LongEncoder();
|
||||
for (Long ts : testTimestamps.getInputs()) {
|
||||
encoder.add(ts);
|
||||
}
|
||||
encoder.compile();
|
||||
blockMeta.setTimestampFields(encoder);
|
||||
bytes = encoder.getByteArray();
|
||||
decoder = new TimestampDecoder();
|
||||
decoder.initOnBlock(blockMeta, bytes);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCompressorMinimum() {
|
||||
Assert.assertEquals(timestamps.getMinimum(), encoder.getMin());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCompressorRoundTrip() {
|
||||
long[] outputs = encoder.getSortedUniqueTimestamps();
|
||||
for (int i = 0; i < timestamps.getOutputs().size(); ++i) {
|
||||
long input = timestamps.getOutputs().get(i);
|
||||
long output = outputs[i];
|
||||
Assert.assertEquals(input, output);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testReaderMinimum() {
|
||||
Assert.assertEquals(timestamps.getMinimum(), decoder.getLong(0));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testReaderRoundTrip() {
|
||||
for (int i = 0; i < timestamps.getOutputs().size(); ++i) {
|
||||
long input = timestamps.getOutputs().get(i);
|
||||
long output = decoder.getLong(i);
|
||||
Assert.assertEquals(input, output);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,54 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hbase.codec.prefixtree.timestamp.data;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.hbase.codec.prefixtree.timestamp.TestTimestampData;
|
||||
|
||||
public class TestTimestampDataBasic implements TestTimestampData {
|
||||
|
||||
@Override
|
||||
public List<Long> getInputs() {
|
||||
List<Long> d = new ArrayList<Long>();
|
||||
d.add(5L);
|
||||
d.add(3L);
|
||||
d.add(0L);
|
||||
d.add(1L);
|
||||
d.add(3L);
|
||||
return d;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getMinimum() {
|
||||
return 0L;
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<Long> getOutputs() {
|
||||
List<Long> d = new ArrayList<Long>();
|
||||
d.add(0L);
|
||||
d.add(1L);
|
||||
d.add(3L);
|
||||
d.add(5L);
|
||||
return d;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,56 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hbase.codec.prefixtree.timestamp.data;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.hbase.codec.prefixtree.timestamp.TestTimestampData;
|
||||
|
||||
public class TestTimestampDataNumbers implements TestTimestampData {
|
||||
|
||||
private int shift = 8;
|
||||
|
||||
@Override
|
||||
public List<Long> getInputs() {
|
||||
List<Long> d = new ArrayList<Long>();
|
||||
d.add(5L << shift);
|
||||
d.add(3L << shift);
|
||||
d.add(7L << shift);
|
||||
d.add(1L << shift);
|
||||
d.add(3L << shift);
|
||||
return d;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getMinimum() {
|
||||
return 1L << shift;
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<Long> getOutputs() {
|
||||
List<Long> d = new ArrayList<Long>();
|
||||
d.add(1L << shift);
|
||||
d.add(3L << shift);
|
||||
d.add(5L << shift);
|
||||
d.add(7L << shift);
|
||||
return d;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,52 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hbase.codec.prefixtree.timestamp.data;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.hbase.codec.prefixtree.timestamp.TestTimestampData;
|
||||
|
||||
public class TestTimestampDataRepeats implements TestTimestampData {
|
||||
|
||||
private static long t = 1234567890L;
|
||||
|
||||
@Override
|
||||
public List<Long> getInputs() {
|
||||
List<Long> d = new ArrayList<Long>();
|
||||
d.add(t);
|
||||
d.add(t);
|
||||
d.add(t);
|
||||
d.add(t);
|
||||
d.add(t);
|
||||
return d;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getMinimum() {
|
||||
return t;
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<Long> getOutputs() {
|
||||
List<Long> d = new ArrayList<Long>();
|
||||
return d;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,34 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hbase.util.bytes;
|
||||
|
||||
import junit.framework.Assert;
|
||||
|
||||
import org.apache.hadoop.hbase.util.ByteRange;
|
||||
import org.junit.Test;
|
||||
|
||||
public class TestByteRange {
|
||||
|
||||
@Test
|
||||
public void testConstructor() {
|
||||
ByteRange b = new ByteRange(new byte[] { 0, 1, 2 });
|
||||
Assert.assertEquals(3, b.getLength());
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,32 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hbase.util.comparator;
|
||||
|
||||
import java.util.Comparator;
|
||||
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
|
||||
public class ByteArrayComparator implements Comparator<byte[]> {
|
||||
|
||||
@Override
|
||||
public int compare(byte[] a, byte[] b) {
|
||||
return Bytes.compareTo(a, b);
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,33 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hbase.util.number;
|
||||
|
||||
import java.text.DecimalFormat;
|
||||
|
||||
public class NumberFormatter {
|
||||
|
||||
public static String addCommas(final Number pValue) {
|
||||
if (pValue == null) {
|
||||
return null;
|
||||
}
|
||||
String format = "###,###,###,###,###,###,###,###.#####################";
|
||||
return new DecimalFormat(format).format(pValue);// biggest is 19 digits
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,34 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hbase.util.number;
|
||||
|
||||
import java.util.Random;
|
||||
|
||||
public class RandomNumberUtils {
|
||||
|
||||
public static long nextPositiveLong(Random random) {
|
||||
while (true) {
|
||||
long value = random.nextLong();
|
||||
if (value > 0) {
|
||||
return value;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,122 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hbase.util.vint;
|
||||
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
|
||||
/********************** tests *************************/
|
||||
|
||||
public class TestFIntTool {
|
||||
@Test
|
||||
public void testLeadingZeros() {
|
||||
Assert.assertEquals(64, Long.numberOfLeadingZeros(0));
|
||||
Assert.assertEquals(63, Long.numberOfLeadingZeros(1));
|
||||
Assert.assertEquals(0, Long.numberOfLeadingZeros(Long.MIN_VALUE));
|
||||
Assert.assertEquals(0, Long.numberOfLeadingZeros(-1));
|
||||
Assert.assertEquals(1, Long.numberOfLeadingZeros(Long.MAX_VALUE));
|
||||
Assert.assertEquals(1, Long.numberOfLeadingZeros(Long.MAX_VALUE - 1));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMaxValueForNumBytes() {
|
||||
Assert.assertEquals(255, UFIntTool.maxValueForNumBytes(1));
|
||||
Assert.assertEquals(65535, UFIntTool.maxValueForNumBytes(2));
|
||||
Assert.assertEquals(0xffffff, UFIntTool.maxValueForNumBytes(3));
|
||||
Assert.assertEquals(0xffffffffffffffL, UFIntTool.maxValueForNumBytes(7));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNumBytes() {
|
||||
Assert.assertEquals(1, UFIntTool.numBytes(0));
|
||||
Assert.assertEquals(1, UFIntTool.numBytes(1));
|
||||
Assert.assertEquals(1, UFIntTool.numBytes(255));
|
||||
Assert.assertEquals(2, UFIntTool.numBytes(256));
|
||||
Assert.assertEquals(2, UFIntTool.numBytes(65535));
|
||||
Assert.assertEquals(3, UFIntTool.numBytes(65536));
|
||||
Assert.assertEquals(4, UFIntTool.numBytes(0xffffffffL));
|
||||
Assert.assertEquals(5, UFIntTool.numBytes(0x100000000L));
|
||||
Assert.assertEquals(4, UFIntTool.numBytes(Integer.MAX_VALUE));
|
||||
Assert.assertEquals(8, UFIntTool.numBytes(Long.MAX_VALUE));
|
||||
Assert.assertEquals(8, UFIntTool.numBytes(Long.MAX_VALUE - 1));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGetBytes() {
|
||||
Assert.assertArrayEquals(new byte[] { 0 }, UFIntTool.getBytes(1, 0));
|
||||
Assert.assertArrayEquals(new byte[] { 1 }, UFIntTool.getBytes(1, 1));
|
||||
Assert.assertArrayEquals(new byte[] { -1 }, UFIntTool.getBytes(1, 255));
|
||||
Assert.assertArrayEquals(new byte[] { 1, 0 }, UFIntTool.getBytes(2, 256));
|
||||
Assert.assertArrayEquals(new byte[] { 1, 3 }, UFIntTool.getBytes(2, 256 + 3));
|
||||
Assert.assertArrayEquals(new byte[] { 1, -128 }, UFIntTool.getBytes(2, 256 + 128));
|
||||
Assert.assertArrayEquals(new byte[] { 1, -1 }, UFIntTool.getBytes(2, 256 + 255));
|
||||
Assert.assertArrayEquals(new byte[] { 127, -1, -1, -1 },
|
||||
UFIntTool.getBytes(4, Integer.MAX_VALUE));
|
||||
Assert.assertArrayEquals(new byte[] { 127, -1, -1, -1, -1, -1, -1, -1 },
|
||||
UFIntTool.getBytes(8, Long.MAX_VALUE));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFromBytes() {
|
||||
Assert.assertEquals(0, UFIntTool.fromBytes(new byte[] { 0 }));
|
||||
Assert.assertEquals(1, UFIntTool.fromBytes(new byte[] { 1 }));
|
||||
Assert.assertEquals(255, UFIntTool.fromBytes(new byte[] { -1 }));
|
||||
Assert.assertEquals(256, UFIntTool.fromBytes(new byte[] { 1, 0 }));
|
||||
Assert.assertEquals(256 + 3, UFIntTool.fromBytes(new byte[] { 1, 3 }));
|
||||
Assert.assertEquals(256 + 128, UFIntTool.fromBytes(new byte[] { 1, -128 }));
|
||||
Assert.assertEquals(256 + 255, UFIntTool.fromBytes(new byte[] { 1, -1 }));
|
||||
Assert.assertEquals(Integer.MAX_VALUE, UFIntTool.fromBytes(new byte[] { 127, -1, -1, -1 }));
|
||||
Assert.assertEquals(Long.MAX_VALUE,
|
||||
UFIntTool.fromBytes(new byte[] { 127, -1, -1, -1, -1, -1, -1, -1 }));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testRoundTrips() {
|
||||
long[] values = new long[] { 0, 1, 2, 255, 256, 31123, 65535, 65536, 65537, 0xfffffeL,
|
||||
0xffffffL, 0x1000000L, 0x1000001L, Integer.MAX_VALUE - 1, Integer.MAX_VALUE,
|
||||
(long) Integer.MAX_VALUE + 1, Long.MAX_VALUE - 1, Long.MAX_VALUE };
|
||||
for (int i = 0; i < values.length; ++i) {
|
||||
Assert.assertEquals(values[i], UFIntTool.fromBytes(UFIntTool.getBytes(8, values[i])));
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testWriteBytes() throws IOException {// copied from testGetBytes
|
||||
Assert.assertArrayEquals(new byte[] { 0 }, bytesViaOutputStream(1, 0));
|
||||
Assert.assertArrayEquals(new byte[] { 1 }, bytesViaOutputStream(1, 1));
|
||||
Assert.assertArrayEquals(new byte[] { -1 }, bytesViaOutputStream(1, 255));
|
||||
Assert.assertArrayEquals(new byte[] { 1, 0 }, bytesViaOutputStream(2, 256));
|
||||
Assert.assertArrayEquals(new byte[] { 1, 3 }, bytesViaOutputStream(2, 256 + 3));
|
||||
Assert.assertArrayEquals(new byte[] { 1, -128 }, bytesViaOutputStream(2, 256 + 128));
|
||||
Assert.assertArrayEquals(new byte[] { 1, -1 }, bytesViaOutputStream(2, 256 + 255));
|
||||
Assert.assertArrayEquals(new byte[] { 127, -1, -1, -1 },
|
||||
bytesViaOutputStream(4, Integer.MAX_VALUE));
|
||||
Assert.assertArrayEquals(new byte[] { 127, -1, -1, -1, -1, -1, -1, -1 },
|
||||
bytesViaOutputStream(8, Long.MAX_VALUE));
|
||||
}
|
||||
|
||||
private byte[] bytesViaOutputStream(int outputWidth, long value) throws IOException {
|
||||
ByteArrayOutputStream os = new ByteArrayOutputStream();
|
||||
UFIntTool.writeBytes(outputWidth, value, os);
|
||||
return os.toByteArray();
|
||||
}
|
||||
}
|
|
@ -0,0 +1,98 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hbase.util.vint;
|
||||
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.util.Random;
|
||||
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
|
||||
public class TestVIntTool {
|
||||
|
||||
@Test
|
||||
public void testNumBytes() {
|
||||
Assert.assertEquals(1, UVIntTool.numBytes(0));
|
||||
Assert.assertEquals(1, UVIntTool.numBytes(1));
|
||||
Assert.assertEquals(1, UVIntTool.numBytes(100));
|
||||
Assert.assertEquals(1, UVIntTool.numBytes(126));
|
||||
Assert.assertEquals(1, UVIntTool.numBytes(127));
|
||||
Assert.assertEquals(2, UVIntTool.numBytes(128));
|
||||
Assert.assertEquals(2, UVIntTool.numBytes(129));
|
||||
Assert.assertEquals(5, UVIntTool.numBytes(Integer.MAX_VALUE));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testWriteBytes() throws IOException {
|
||||
Assert.assertArrayEquals(new byte[] { 0 }, bytesViaOutputStream(0));
|
||||
Assert.assertArrayEquals(new byte[] { 1 }, bytesViaOutputStream(1));
|
||||
Assert.assertArrayEquals(new byte[] { 63 }, bytesViaOutputStream(63));
|
||||
Assert.assertArrayEquals(new byte[] { 127 }, bytesViaOutputStream(127));
|
||||
Assert.assertArrayEquals(new byte[] { -128, 1 }, bytesViaOutputStream(128));
|
||||
Assert.assertArrayEquals(new byte[] { -128 + 27, 1 }, bytesViaOutputStream(155));
|
||||
Assert.assertArrayEquals(UVIntTool.MAX_VALUE_BYTES, bytesViaOutputStream(Integer.MAX_VALUE));
|
||||
}
|
||||
|
||||
private byte[] bytesViaOutputStream(int value) throws IOException {
|
||||
ByteArrayOutputStream os = new ByteArrayOutputStream();
|
||||
UVIntTool.writeBytes(value, os);
|
||||
return os.toByteArray();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testToBytes() {
|
||||
Assert.assertArrayEquals(new byte[] { 0 }, UVIntTool.getBytes(0));
|
||||
Assert.assertArrayEquals(new byte[] { 1 }, UVIntTool.getBytes(1));
|
||||
Assert.assertArrayEquals(new byte[] { 63 }, UVIntTool.getBytes(63));
|
||||
Assert.assertArrayEquals(new byte[] { 127 }, UVIntTool.getBytes(127));
|
||||
Assert.assertArrayEquals(new byte[] { -128, 1 }, UVIntTool.getBytes(128));
|
||||
Assert.assertArrayEquals(new byte[] { -128 + 27, 1 }, UVIntTool.getBytes(155));
|
||||
Assert.assertArrayEquals(UVIntTool.MAX_VALUE_BYTES, UVIntTool.getBytes(Integer.MAX_VALUE));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFromBytes() {
|
||||
Assert.assertEquals(Integer.MAX_VALUE, UVIntTool.getInt(UVIntTool.MAX_VALUE_BYTES));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testRoundTrips() {
|
||||
Random random = new Random();
|
||||
for (int i = 0; i < 10000; ++i) {
|
||||
int value = random.nextInt(Integer.MAX_VALUE);
|
||||
byte[] bytes = UVIntTool.getBytes(value);
|
||||
int roundTripped = UVIntTool.getInt(bytes);
|
||||
Assert.assertEquals(value, roundTripped);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testInputStreams() throws IOException {
|
||||
ByteArrayInputStream is;
|
||||
is = new ByteArrayInputStream(new byte[] { 0 });
|
||||
Assert.assertEquals(0, UVIntTool.getInt(is));
|
||||
is = new ByteArrayInputStream(new byte[] { 5 });
|
||||
Assert.assertEquals(5, UVIntTool.getInt(is));
|
||||
is = new ByteArrayInputStream(new byte[] { -128 + 27, 1 });
|
||||
Assert.assertEquals(155, UVIntTool.getInt(is));
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,105 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hbase.util.vint;
|
||||
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.IOException;
|
||||
import java.util.Random;
|
||||
|
||||
import org.apache.hbase.util.number.RandomNumberUtils;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
|
||||
public class TestVLongTool {
|
||||
|
||||
@Test
|
||||
public void testNumBytes() {
|
||||
Assert.assertEquals(1, UVLongTool.numBytes(0));
|
||||
Assert.assertEquals(1, UVLongTool.numBytes(1));
|
||||
Assert.assertEquals(1, UVLongTool.numBytes(100));
|
||||
Assert.assertEquals(1, UVLongTool.numBytes(126));
|
||||
Assert.assertEquals(1, UVLongTool.numBytes(127));
|
||||
Assert.assertEquals(2, UVLongTool.numBytes(128));
|
||||
Assert.assertEquals(2, UVLongTool.numBytes(129));
|
||||
Assert.assertEquals(9, UVLongTool.numBytes(Long.MAX_VALUE));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testToBytes() {
|
||||
Assert.assertArrayEquals(new byte[] { 0 }, UVLongTool.getBytes(0));
|
||||
Assert.assertArrayEquals(new byte[] { 1 }, UVLongTool.getBytes(1));
|
||||
Assert.assertArrayEquals(new byte[] { 63 }, UVLongTool.getBytes(63));
|
||||
Assert.assertArrayEquals(new byte[] { 127 }, UVLongTool.getBytes(127));
|
||||
Assert.assertArrayEquals(new byte[] { -128, 1 }, UVLongTool.getBytes(128));
|
||||
Assert.assertArrayEquals(new byte[] { -128 + 27, 1 }, UVLongTool.getBytes(155));
|
||||
Assert.assertArrayEquals(UVLongTool.MAX_VALUE_BYTES, UVLongTool.getBytes(Long.MAX_VALUE));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFromBytes() {
|
||||
Assert.assertEquals(Long.MAX_VALUE, UVLongTool.getLong(UVLongTool.MAX_VALUE_BYTES));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFromBytesOffset() {
|
||||
Assert.assertEquals(Long.MAX_VALUE, UVLongTool.getLong(UVLongTool.MAX_VALUE_BYTES, 0));
|
||||
|
||||
long ms = 1318966363481L;
|
||||
// System.out.println(ms);
|
||||
byte[] bytes = UVLongTool.getBytes(ms);
|
||||
// System.out.println(Arrays.toString(bytes));
|
||||
long roundTripped = UVLongTool.getLong(bytes, 0);
|
||||
Assert.assertEquals(ms, roundTripped);
|
||||
|
||||
int calculatedNumBytes = UVLongTool.numBytes(ms);
|
||||
int actualNumBytes = bytes.length;
|
||||
Assert.assertEquals(actualNumBytes, calculatedNumBytes);
|
||||
|
||||
byte[] shiftedBytes = new byte[1000];
|
||||
int shift = 33;
|
||||
System.arraycopy(bytes, 0, shiftedBytes, shift, bytes.length);
|
||||
long shiftedRoundTrip = UVLongTool.getLong(shiftedBytes, shift);
|
||||
Assert.assertEquals(ms, shiftedRoundTrip);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testRoundTrips() {
|
||||
Random random = new Random();
|
||||
for (int i = 0; i < 10000; ++i) {
|
||||
long value = RandomNumberUtils.nextPositiveLong(random);
|
||||
byte[] bytes = UVLongTool.getBytes(value);
|
||||
long roundTripped = UVLongTool.getLong(bytes);
|
||||
Assert.assertEquals(value, roundTripped);
|
||||
int calculatedNumBytes = UVLongTool.numBytes(value);
|
||||
int actualNumBytes = bytes.length;
|
||||
Assert.assertEquals(actualNumBytes, calculatedNumBytes);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testInputStreams() throws IOException {
|
||||
ByteArrayInputStream is;
|
||||
is = new ByteArrayInputStream(new byte[] { 0 });
|
||||
Assert.assertEquals(0, UVLongTool.getLong(is));
|
||||
is = new ByteArrayInputStream(new byte[] { 5 });
|
||||
Assert.assertEquals(5, UVLongTool.getLong(is));
|
||||
is = new ByteArrayInputStream(new byte[] { -128 + 27, 1 });
|
||||
Assert.assertEquals(155, UVLongTool.getLong(is));
|
||||
}
|
||||
}
|
|
@ -282,6 +282,12 @@
|
|||
<groupId>org.apache.hbase</groupId>
|
||||
<artifactId>hbase-client</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.hbase</groupId>
|
||||
<artifactId>hbase-prefix-tree</artifactId>
|
||||
<!-- unfortunately, runtime scope causes eclipse to put it in the compile time classpath -->
|
||||
<scope>runtime</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.hbase</groupId>
|
||||
<artifactId>hbase-common</artifactId>
|
||||
|
|
|
@ -112,9 +112,10 @@ import org.hbase.async.Scanner;
|
|||
public class PerformanceEvaluation extends Configured implements Tool {
|
||||
protected static final Log LOG = LogFactory.getLog(PerformanceEvaluation.class.getName());
|
||||
|
||||
private static final int ROW_LENGTH = 1000;
|
||||
private static final int DEFAULT_ROW_PREFIX_LENGTH = 16;
|
||||
private static final int VALUE_LENGTH = 1000;
|
||||
private static final int ONE_GB = 1024 * 1024 * 1000;
|
||||
private static final int ROWS_PER_GB = ONE_GB / ROW_LENGTH;
|
||||
private static final int ROWS_PER_GB = ONE_GB / VALUE_LENGTH;
|
||||
|
||||
public static final byte[] COMPRESSION = Bytes.toBytes("NONE");
|
||||
public static final byte[] TABLE_NAME = Bytes.toBytes("TestTable");
|
||||
|
@ -127,6 +128,7 @@ public class PerformanceEvaluation extends Configured implements Tool {
|
|||
|
||||
private boolean miniCluster = false;
|
||||
private boolean nomapred = false;
|
||||
private int rowPrefixLength = DEFAULT_ROW_PREFIX_LENGTH;
|
||||
private int N = 1;
|
||||
private int R = ROWS_PER_GB;
|
||||
private byte[] tableName = TABLE_NAME;
|
||||
|
@ -537,10 +539,11 @@ public class PerformanceEvaluation extends Configured implements Tool {
|
|||
if (this.presplitRegions == 0)
|
||||
return new byte [0][];
|
||||
|
||||
byte[][] splits = new byte[this.presplitRegions][];
|
||||
int numSplitPoints = presplitRegions - 1;
|
||||
byte[][] splits = new byte[numSplitPoints][];
|
||||
int jump = this.R / this.presplitRegions;
|
||||
for (int i=0; i <this.presplitRegions; i++) {
|
||||
int rowkey = jump * i;
|
||||
for (int i=0; i < numSplitPoints; i++) {
|
||||
int rowkey = jump * (1 + i);
|
||||
splits[i] = format(rowkey);
|
||||
}
|
||||
return splits;
|
||||
|
@ -931,9 +934,9 @@ public class PerformanceEvaluation extends Configured implements Tool {
|
|||
if (row.size() != 1) {
|
||||
throw new IOException((row.isEmpty() ? "No" : "Multiple (" + row.size() + ')')
|
||||
+ " KeyValue found in row");
|
||||
} else if (row.get(0).value().length != ROW_LENGTH) {
|
||||
} else if (row.get(0).value().length != VALUE_LENGTH) {
|
||||
throw new IOException("Invalid value length (found: " + row.get(0).value().length
|
||||
+ ", expected: " + ROW_LENGTH + ") in row \""
|
||||
+ ", expected: " + VALUE_LENGTH + ") in row \""
|
||||
+ new String(row.get(0).key()) + '"');
|
||||
}
|
||||
}
|
||||
|
@ -1420,7 +1423,7 @@ public class PerformanceEvaluation extends Configured implements Tool {
|
|||
* number (Does absolute in case number is negative).
|
||||
*/
|
||||
public static byte [] format(final int number) {
|
||||
byte [] b = new byte[10];
|
||||
byte [] b = new byte[DEFAULT_ROW_PREFIX_LENGTH + 10];
|
||||
int d = Math.abs(number);
|
||||
for (int i = b.length - 1; i >= 0; i--) {
|
||||
b[i] = (byte)((d % 10) + '0');
|
||||
|
@ -1436,10 +1439,10 @@ public class PerformanceEvaluation extends Configured implements Tool {
|
|||
* @return Generated random value to insert into a table cell.
|
||||
*/
|
||||
public static byte[] generateValue(final Random r) {
|
||||
byte [] b = new byte [ROW_LENGTH];
|
||||
byte [] b = new byte [VALUE_LENGTH];
|
||||
int i = 0;
|
||||
|
||||
for(i = 0; i < (ROW_LENGTH-8); i += 8) {
|
||||
for(i = 0; i < (VALUE_LENGTH-8); i += 8) {
|
||||
b[i] = (byte) (65 + r.nextInt(26));
|
||||
b[i+1] = b[i];
|
||||
b[i+2] = b[i];
|
||||
|
@ -1451,7 +1454,7 @@ public class PerformanceEvaluation extends Configured implements Tool {
|
|||
}
|
||||
|
||||
byte a = (byte) (65 + r.nextInt(26));
|
||||
for(; i < ROW_LENGTH; i++) {
|
||||
for(; i < VALUE_LENGTH; i++) {
|
||||
b[i] = a;
|
||||
}
|
||||
return b;
|
||||
|
|
|
@ -105,15 +105,15 @@ public class TestEncodedSeekers {
|
|||
|
||||
//write the data, but leave some in the memstore
|
||||
doPuts(region);
|
||||
|
||||
|
||||
//verify correctness when memstore contains data
|
||||
doGets(region);
|
||||
|
||||
|
||||
//verify correctness again after compacting
|
||||
region.compactStores();
|
||||
doGets(region);
|
||||
|
||||
|
||||
|
||||
Map<DataBlockEncoding, Integer> encodingCounts = cache.getEncodingCountsForTest();
|
||||
|
||||
// Ensure that compactions don't pollute the cache with unencoded blocks
|
||||
|
@ -124,8 +124,8 @@ public class TestEncodedSeekers {
|
|||
assertEquals(encoding, encodingInCache);
|
||||
assertTrue(encodingCounts.get(encodingInCache) > 0);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
private void doPuts(HRegion region) throws IOException{
|
||||
LoadTestKVGenerator dataGenerator = new LoadTestKVGenerator(MIN_VALUE_SIZE, MAX_VALUE_SIZE);
|
||||
for (int i = 0; i < NUM_ROWS; ++i) {
|
||||
|
@ -146,8 +146,8 @@ public class TestEncodedSeekers {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
private void doGets(HRegion region) throws IOException{
|
||||
for (int i = 0; i < NUM_ROWS; ++i) {
|
||||
final byte[] rowKey = LoadTestKVGenerator.md5PrefixedKey(i).getBytes();
|
||||
|
|
11
pom.xml
11
pom.xml
|
@ -42,7 +42,7 @@
|
|||
<version>0.95-SNAPSHOT</version>
|
||||
<name>HBase</name>
|
||||
<description>
|
||||
Apache HBase™ is the &lt;a href="http://hadoop.apache.org"&rt;Hadoop</a&rt; database. Use it when you need
|
||||
Apache HBase is the &lt;a href="http://hadoop.apache.org"&rt;Hadoop</a&rt; database. Use it when you need
|
||||
random, realtime read/write access to your Big Data.
|
||||
This project's goal is the hosting of very large tables -- billions of rows X millions of columns -- atop clusters
|
||||
of commodity hardware.
|
||||
|
@ -56,6 +56,7 @@
|
|||
<module>hbase-common</module>
|
||||
<module>hbase-it</module>
|
||||
<module>hbase-examples</module>
|
||||
<module>hbase-prefix-tree</module>
|
||||
</modules>
|
||||
<scm>
|
||||
<connection>scm:svn:http://svn.apache.org/repos/asf/hbase/trunk</connection>
|
||||
|
@ -978,6 +979,14 @@
|
|||
<type>test-jar</type>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.hbase</groupId>
|
||||
<artifactId>hbase-prefix-tree</artifactId>
|
||||
<version>${project.version}</version>
|
||||
<!-- unfortunately, runtime scope causes Eclipse to give compile time access which isn't
|
||||
needed, however it is apparently needed to run things within Eclipse -->
|
||||
<scope>runtime</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<artifactId>hbase-examples</artifactId>
|
||||
<groupId>org.apache.hbase</groupId>
|
||||
|
|
Loading…
Reference in New Issue