HBASE-19179 Remove hbase-prefix-tree

This commit is contained in:
Michael Stack 2017-11-03 20:10:36 -07:00
parent cdff80d976
commit f8c58930aa
No known key found for this signature in database
GPG Key ID: 9816C7FC8ACC93D2
103 changed files with 8 additions and 12052 deletions

View File

@ -39,7 +39,7 @@ public enum DataBlockEncoding {
FAST_DIFF(4, "org.apache.hadoop.hbase.io.encoding.FastDiffDeltaEncoder"),
// id 5 is reserved for the COPY_KEY algorithm for benchmarking
// COPY_KEY(5, "org.apache.hadoop.hbase.io.encoding.CopyKeyDataBlockEncoder"),
PREFIX_TREE(6, "org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeCodec"),
// PREFIX_TREE(6, "org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeCodec"),
ROW_INDEX_V1(7, "org.apache.hadoop.hbase.io.encoding.RowIndexCodecV1");
private final short id;

View File

@ -179,10 +179,6 @@
<artifactId>slf4j-api</artifactId>
<version>${slf4j.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-prefix-tree</artifactId>
</dependency>
<dependency>
<groupId>org.apache.htrace</groupId>
<artifactId>htrace-core</artifactId>

View File

@ -801,21 +801,6 @@ public class TableMapReduceUtil {
* @see <a href="https://issues.apache.org/jira/browse/PIG-3285">PIG-3285</a>
*/
public static void addHBaseDependencyJars(Configuration conf) throws IOException {
// PrefixTreeCodec is part of the hbase-prefix-tree module. If not included in MR jobs jar
// dependencies, MR jobs that write encoded hfiles will fail.
// We used reflection here so to prevent a circular module dependency.
// TODO - if we extract the MR into a module, make it depend on hbase-prefix-tree.
Class prefixTreeCodecClass = null;
try {
prefixTreeCodecClass =
Class.forName("org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeCodec");
} catch (ClassNotFoundException e) {
// this will show up in unit tests but should not show in real deployments
LOG.warn("The hbase-prefix-tree module jar containing PrefixTreeCodec is not present." +
" Continuing without it.");
}
addDependencyJarsForClasses(conf,
// explicitly pull a class from each module
org.apache.hadoop.hbase.HConstants.class, // hbase-common
@ -828,8 +813,6 @@ public class TableMapReduceUtil {
org.apache.hadoop.hbase.mapreduce.TableMapper.class, // hbase-mapreduce
org.apache.hadoop.hbase.metrics.impl.FastLongHistogram.class, // hbase-metrics
org.apache.hadoop.hbase.metrics.Snapshot.class, // hbase-metrics-api
prefixTreeCodecClass, // hbase-prefix-tree (if null will be skipped)
// pull necessary dependencies
org.apache.zookeeper.ZooKeeper.class,
org.apache.hadoop.hbase.shaded.io.netty.channel.Channel.class,
com.google.protobuf.Message.class,

View File

@ -1,189 +0,0 @@
<?xml version="1.0"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<!--
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-->
<modelVersion>4.0.0</modelVersion>
<parent>
<artifactId>hbase-build-configuration</artifactId>
<groupId>org.apache.hbase</groupId>
<version>2.0.0-alpha4-SNAPSHOT</version>
<relativePath>../hbase-build-configuration</relativePath>
</parent>
<artifactId>hbase-prefix-tree</artifactId>
<name>Apache HBase - Prefix Tree</name>
<description>Prefix Tree Data Block Encoder</description>
<!--REMOVE-->
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-site-plugin</artifactId>
<configuration>
<skip>true</skip>
</configuration>
</plugin>
<!-- Make a jar and put the sources in the jar -->
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-source-plugin</artifactId>
</plugin>
<plugin>
<!--Make it so assembly:single does nothing in here-->
<artifactId>maven-assembly-plugin</artifactId>
<configuration>
<skipAssembly>true</skipAssembly>
</configuration>
</plugin>
</plugins>
<pluginManagement>
<plugins>
<!--This plugin's configuration is used to store Eclipse m2e settings
only. It has no influence on the Maven build itself.-->
<plugin>
<groupId>org.eclipse.m2e</groupId>
<artifactId>lifecycle-mapping</artifactId>
<configuration>
<lifecycleMappingMetadata>
<pluginExecutions>
<pluginExecution>
<pluginExecutionFilter>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<versionRange>[3.2,)</versionRange>
<goals>
<goal>compile</goal>
</goals>
</pluginExecutionFilter>
<action>
<ignore></ignore>
</action>
</pluginExecution>
</pluginExecutions>
</lifecycleMappingMetadata>
</configuration>
</plugin>
</plugins>
</pluginManagement>
</build>
<dependencies>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-common</artifactId>
<version>${project.version}</version>
<classifier>tests</classifier>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-annotations</artifactId>
<type>test-jar</type>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-common</artifactId>
</dependency>
<dependency>
<groupId>org.apache.hbase.thirdparty</groupId>
<artifactId>hbase-shaded-miscellaneous</artifactId>
</dependency>
<dependency>
<groupId>commons-logging</groupId>
<artifactId>commons-logging</artifactId>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<scope>test</scope>
</dependency>
</dependencies>
<profiles>
<!-- Skip the tests in this module -->
<profile>
<id>skipPrefixTreeTests</id>
<activation>
<property>
<name>skipPrefixTreeTests</name>
</property>
</activation>
<properties>
<surefire.skipFirstPart>true</surefire.skipFirstPart>
<surefire.skipSecondPart>true</surefire.skipSecondPart>
</properties>
</profile>
<!-- Profiles for building against different hadoop versions -->
<!--
profile for building against Hadoop 2.0.0-alpha. Activate using:
mvn -Dhadoop.profile=2.0
-->
<profile>
<id>hadoop-2.0</id>
<activation>
<property>
<!--Below formatting for dev-support/generate-hadoopX-poms.sh-->
<!--h2--><name>!hadoop.profile</name>
</property>
</activation>
<dependencies>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<exclusions>
<exclusion>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
</exclusion>
</exclusions>
</dependency>
</dependencies>
</profile>
<!--
profile for building against Hadoop 3.0.x. Activate using:
mvn -Dhadoop.profile=3.0
-->
<profile>
<id>hadoop-3.0</id>
<activation>
<property>
<name>hadoop.profile</name>
<value>3.0</value>
</property>
</activation>
<properties>
<hadoop.version>3.0-SNAPSHOT</hadoop.version>
</properties>
<dependencies>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<exclusions>
<exclusion>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
</exclusion>
</exclusions>
</dependency>
</dependencies>
</profile>
</profiles>
</project>

View File

@ -1,899 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.codec.prefixtree;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import org.apache.yetus.audience.InterfaceAudience;
import org.apache.hadoop.hbase.codec.prefixtree.encode.other.LongEncoder;
import org.apache.hadoop.hbase.nio.ByteBuff;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.vint.UVIntTool;
import org.apache.hadoop.hbase.util.vint.UVLongTool;
/**
* Information about the block. Stored at the beginning of the byte[]. Contains things
* like minimum timestamp and width of FInts in the row tree.
*
* Most fields stored in VInts that get decoded on the first access of each new block.
*/
@InterfaceAudience.Private
public class PrefixTreeBlockMeta {
/******************* static fields ********************/
public static final int VERSION = 0;
public static final int MAX_FAMILY_LENGTH = Byte.MAX_VALUE;// hard-coded in KeyValue
public static final int
NUM_LONGS = 2,
NUM_INTS = 28,
NUM_SHORTS = 0,//keyValueTypeWidth not persisted
NUM_SINGLE_BYTES = 2,
MAX_BYTES = Bytes.SIZEOF_LONG * NUM_LONGS
+ Bytes.SIZEOF_SHORT * NUM_SHORTS
+ Bytes.SIZEOF_INT * NUM_INTS
+ NUM_SINGLE_BYTES;
/**************** transient fields *********************/
protected int bufferOffset;
/**************** persisted fields **********************/
// PrefixTree version to allow future format modifications
protected int version;
protected int numMetaBytes;
protected int numKeyValueBytes;
protected boolean includesMvccVersion;//probably don't need this explicitly, but only 1 byte
// split the byte[] into 6 sections for the different data types
protected int numRowBytes;
protected int numFamilyBytes;
protected int numQualifierBytes;
protected int numTimestampBytes;
protected int numMvccVersionBytes;
protected int numValueBytes;
protected int numTagsBytes;
// number of bytes in each section of fixed width FInts
protected int nextNodeOffsetWidth;
protected int familyOffsetWidth;
protected int qualifierOffsetWidth;
protected int timestampIndexWidth;
protected int mvccVersionIndexWidth;
protected int valueOffsetWidth;
protected int valueLengthWidth;
protected int tagsOffsetWidth;
// used to pre-allocate structures for reading
protected int rowTreeDepth;
protected int maxRowLength;
protected int maxQualifierLength;
protected int maxTagsLength;
// the timestamp from which the deltas are calculated
protected long minTimestamp;
protected int timestampDeltaWidth;
protected long minMvccVersion;
protected int mvccVersionDeltaWidth;
protected boolean allSameType;
protected byte allTypes;
protected int numUniqueRows;
protected int numUniqueFamilies;
protected int numUniqueQualifiers;
protected int numUniqueTags;
/***************** constructors ********************/
public PrefixTreeBlockMeta() {
}
public PrefixTreeBlockMeta(InputStream is) throws IOException{
this.version = VERSION;
this.bufferOffset = 0;
readVariableBytesFromInputStream(is);
}
/**
* @param buffer positioned at start of PtBlockMeta
*/
public PrefixTreeBlockMeta(ByteBuff buffer) {
initOnBlock(buffer);
}
public void initOnBlock(ByteBuff buffer) {
bufferOffset = buffer.position();
readVariableBytesFromBuffer(buffer, bufferOffset);
}
/**************** operate on each field **********************/
public int calculateNumMetaBytes(){
int numBytes = 0;
numBytes += UVIntTool.numBytes(version);
numBytes += UVLongTool.numBytes(numMetaBytes);
numBytes += UVIntTool.numBytes(numKeyValueBytes);
++numBytes;//os.write(getIncludesMvccVersion());
numBytes += UVIntTool.numBytes(numRowBytes);
numBytes += UVIntTool.numBytes(numFamilyBytes);
numBytes += UVIntTool.numBytes(numQualifierBytes);
numBytes += UVIntTool.numBytes(numTagsBytes);
numBytes += UVIntTool.numBytes(numTimestampBytes);
numBytes += UVIntTool.numBytes(numMvccVersionBytes);
numBytes += UVIntTool.numBytes(numValueBytes);
numBytes += UVIntTool.numBytes(nextNodeOffsetWidth);
numBytes += UVIntTool.numBytes(familyOffsetWidth);
numBytes += UVIntTool.numBytes(qualifierOffsetWidth);
numBytes += UVIntTool.numBytes(tagsOffsetWidth);
numBytes += UVIntTool.numBytes(timestampIndexWidth);
numBytes += UVIntTool.numBytes(mvccVersionIndexWidth);
numBytes += UVIntTool.numBytes(valueOffsetWidth);
numBytes += UVIntTool.numBytes(valueLengthWidth);
numBytes += UVIntTool.numBytes(rowTreeDepth);
numBytes += UVIntTool.numBytes(maxRowLength);
numBytes += UVIntTool.numBytes(maxQualifierLength);
numBytes += UVIntTool.numBytes(maxTagsLength);
numBytes += UVLongTool.numBytes(minTimestamp);
numBytes += UVIntTool.numBytes(timestampDeltaWidth);
numBytes += UVLongTool.numBytes(minMvccVersion);
numBytes += UVIntTool.numBytes(mvccVersionDeltaWidth);
++numBytes;//os.write(getAllSameTypeByte());
++numBytes;//os.write(allTypes);
numBytes += UVIntTool.numBytes(numUniqueRows);
numBytes += UVIntTool.numBytes(numUniqueFamilies);
numBytes += UVIntTool.numBytes(numUniqueQualifiers);
numBytes += UVIntTool.numBytes(numUniqueTags);
return numBytes;
}
public void writeVariableBytesToOutputStream(OutputStream os) throws IOException{
UVIntTool.writeBytes(version, os);
UVIntTool.writeBytes(numMetaBytes, os);
UVIntTool.writeBytes(numKeyValueBytes, os);
os.write(getIncludesMvccVersionByte());
UVIntTool.writeBytes(numRowBytes, os);
UVIntTool.writeBytes(numFamilyBytes, os);
UVIntTool.writeBytes(numQualifierBytes, os);
UVIntTool.writeBytes(numTagsBytes, os);
UVIntTool.writeBytes(numTimestampBytes, os);
UVIntTool.writeBytes(numMvccVersionBytes, os);
UVIntTool.writeBytes(numValueBytes, os);
UVIntTool.writeBytes(nextNodeOffsetWidth, os);
UVIntTool.writeBytes(familyOffsetWidth, os);
UVIntTool.writeBytes(qualifierOffsetWidth, os);
UVIntTool.writeBytes(tagsOffsetWidth, os);
UVIntTool.writeBytes(timestampIndexWidth, os);
UVIntTool.writeBytes(mvccVersionIndexWidth, os);
UVIntTool.writeBytes(valueOffsetWidth, os);
UVIntTool.writeBytes(valueLengthWidth, os);
UVIntTool.writeBytes(rowTreeDepth, os);
UVIntTool.writeBytes(maxRowLength, os);
UVIntTool.writeBytes(maxQualifierLength, os);
UVIntTool.writeBytes(maxTagsLength, os);
UVLongTool.writeBytes(minTimestamp, os);
UVIntTool.writeBytes(timestampDeltaWidth, os);
UVLongTool.writeBytes(minMvccVersion, os);
UVIntTool.writeBytes(mvccVersionDeltaWidth, os);
os.write(getAllSameTypeByte());
os.write(allTypes);
UVIntTool.writeBytes(numUniqueRows, os);
UVIntTool.writeBytes(numUniqueFamilies, os);
UVIntTool.writeBytes(numUniqueQualifiers, os);
UVIntTool.writeBytes(numUniqueTags, os);
}
public void readVariableBytesFromInputStream(InputStream is) throws IOException{
version = UVIntTool.getInt(is);
numMetaBytes = UVIntTool.getInt(is);
numKeyValueBytes = UVIntTool.getInt(is);
setIncludesMvccVersion((byte) is.read());
numRowBytes = UVIntTool.getInt(is);
numFamilyBytes = UVIntTool.getInt(is);
numQualifierBytes = UVIntTool.getInt(is);
numTagsBytes = UVIntTool.getInt(is);
numTimestampBytes = UVIntTool.getInt(is);
numMvccVersionBytes = UVIntTool.getInt(is);
numValueBytes = UVIntTool.getInt(is);
nextNodeOffsetWidth = UVIntTool.getInt(is);
familyOffsetWidth = UVIntTool.getInt(is);
qualifierOffsetWidth = UVIntTool.getInt(is);
tagsOffsetWidth = UVIntTool.getInt(is);
timestampIndexWidth = UVIntTool.getInt(is);
mvccVersionIndexWidth = UVIntTool.getInt(is);
valueOffsetWidth = UVIntTool.getInt(is);
valueLengthWidth = UVIntTool.getInt(is);
rowTreeDepth = UVIntTool.getInt(is);
maxRowLength = UVIntTool.getInt(is);
maxQualifierLength = UVIntTool.getInt(is);
maxTagsLength = UVIntTool.getInt(is);
minTimestamp = UVLongTool.getLong(is);
timestampDeltaWidth = UVIntTool.getInt(is);
minMvccVersion = UVLongTool.getLong(is);
mvccVersionDeltaWidth = UVIntTool.getInt(is);
setAllSameType((byte) is.read());
allTypes = (byte) is.read();
numUniqueRows = UVIntTool.getInt(is);
numUniqueFamilies = UVIntTool.getInt(is);
numUniqueQualifiers = UVIntTool.getInt(is);
numUniqueTags = UVIntTool.getInt(is);
}
public void readVariableBytesFromBuffer(ByteBuff buf, int offset) {
int position = offset;
version = UVIntTool.getInt(buf, position);
position += UVIntTool.numBytes(version);
numMetaBytes = UVIntTool.getInt(buf, position);
position += UVIntTool.numBytes(numMetaBytes);
numKeyValueBytes = UVIntTool.getInt(buf, position);
position += UVIntTool.numBytes(numKeyValueBytes);
setIncludesMvccVersion(buf.get(position));
++position;
numRowBytes = UVIntTool.getInt(buf, position);
position += UVIntTool.numBytes(numRowBytes);
numFamilyBytes = UVIntTool.getInt(buf, position);
position += UVIntTool.numBytes(numFamilyBytes);
numQualifierBytes = UVIntTool.getInt(buf, position);
position += UVIntTool.numBytes(numQualifierBytes);
numTagsBytes = UVIntTool.getInt(buf, position);
position += UVIntTool.numBytes(numTagsBytes);
numTimestampBytes = UVIntTool.getInt(buf, position);
position += UVIntTool.numBytes(numTimestampBytes);
numMvccVersionBytes = UVIntTool.getInt(buf, position);
position += UVIntTool.numBytes(numMvccVersionBytes);
numValueBytes = UVIntTool.getInt(buf, position);
position += UVIntTool.numBytes(numValueBytes);
nextNodeOffsetWidth = UVIntTool.getInt(buf, position);
position += UVIntTool.numBytes(nextNodeOffsetWidth);
familyOffsetWidth = UVIntTool.getInt(buf, position);
position += UVIntTool.numBytes(familyOffsetWidth);
qualifierOffsetWidth = UVIntTool.getInt(buf, position);
position += UVIntTool.numBytes(qualifierOffsetWidth);
tagsOffsetWidth = UVIntTool.getInt(buf, position);
position += UVIntTool.numBytes(tagsOffsetWidth);
timestampIndexWidth = UVIntTool.getInt(buf, position);
position += UVIntTool.numBytes(timestampIndexWidth);
mvccVersionIndexWidth = UVIntTool.getInt(buf, position);
position += UVIntTool.numBytes(mvccVersionIndexWidth);
valueOffsetWidth = UVIntTool.getInt(buf, position);
position += UVIntTool.numBytes(valueOffsetWidth);
valueLengthWidth = UVIntTool.getInt(buf, position);
position += UVIntTool.numBytes(valueLengthWidth);
rowTreeDepth = UVIntTool.getInt(buf, position);
position += UVIntTool.numBytes(rowTreeDepth);
maxRowLength = UVIntTool.getInt(buf, position);
position += UVIntTool.numBytes(maxRowLength);
maxQualifierLength = UVIntTool.getInt(buf, position);
position += UVIntTool.numBytes(maxQualifierLength);
maxTagsLength = UVIntTool.getInt(buf, position);
position += UVIntTool.numBytes(maxTagsLength);
minTimestamp = UVLongTool.getLong(buf, position);
position += UVLongTool.numBytes(minTimestamp);
timestampDeltaWidth = UVIntTool.getInt(buf, position);
position += UVIntTool.numBytes(timestampDeltaWidth);
minMvccVersion = UVLongTool.getLong(buf, position);
position += UVLongTool.numBytes(minMvccVersion);
mvccVersionDeltaWidth = UVIntTool.getInt(buf, position);
position += UVIntTool.numBytes(mvccVersionDeltaWidth);
setAllSameType(buf.get(position));
++position;
allTypes = buf.get(position);
++position;
numUniqueRows = UVIntTool.getInt(buf, position);
position += UVIntTool.numBytes(numUniqueRows);
numUniqueFamilies = UVIntTool.getInt(buf, position);
position += UVIntTool.numBytes(numUniqueFamilies);
numUniqueQualifiers = UVIntTool.getInt(buf, position);
position += UVIntTool.numBytes(numUniqueQualifiers);
numUniqueTags = UVIntTool.getInt(buf, position);
position += UVIntTool.numBytes(numUniqueTags);
}
//TODO method that can read directly from ByteBuffer instead of InputStream
/*************** methods *************************/
public int getKeyValueTypeWidth() {
return allSameType ? 0 : 1;
}
public byte getIncludesMvccVersionByte() {
return includesMvccVersion ? (byte) 1 : (byte) 0;
}
public void setIncludesMvccVersion(byte includesMvccVersionByte) {
includesMvccVersion = includesMvccVersionByte != 0;
}
public byte getAllSameTypeByte() {
return allSameType ? (byte) 1 : (byte) 0;
}
public void setAllSameType(byte allSameTypeByte) {
allSameType = allSameTypeByte != 0;
}
public boolean isAllSameTimestamp() {
return timestampIndexWidth == 0;
}
public boolean isAllSameMvccVersion() {
return mvccVersionIndexWidth == 0;
}
public void setTimestampFields(LongEncoder encoder){
this.minTimestamp = encoder.getMin();
this.timestampIndexWidth = encoder.getBytesPerIndex();
this.timestampDeltaWidth = encoder.getBytesPerDelta();
this.numTimestampBytes = encoder.getTotalCompressedBytes();
}
public void setMvccVersionFields(LongEncoder encoder){
this.minMvccVersion = encoder.getMin();
this.mvccVersionIndexWidth = encoder.getBytesPerIndex();
this.mvccVersionDeltaWidth = encoder.getBytesPerDelta();
this.numMvccVersionBytes = encoder.getTotalCompressedBytes();
}
/*************** Object methods *************************/
/**
* Generated by Eclipse
*/
@Override
public boolean equals(Object obj) {
if (this == obj)
return true;
if (obj == null)
return false;
if (getClass() != obj.getClass())
return false;
PrefixTreeBlockMeta other = (PrefixTreeBlockMeta) obj;
if (allSameType != other.allSameType)
return false;
if (allTypes != other.allTypes)
return false;
if (bufferOffset != other.bufferOffset)
return false;
if (valueLengthWidth != other.valueLengthWidth)
return false;
if (valueOffsetWidth != other.valueOffsetWidth)
return false;
if (familyOffsetWidth != other.familyOffsetWidth)
return false;
if (includesMvccVersion != other.includesMvccVersion)
return false;
if (maxQualifierLength != other.maxQualifierLength)
return false;
if (maxTagsLength != other.maxTagsLength)
return false;
if (maxRowLength != other.maxRowLength)
return false;
if (mvccVersionDeltaWidth != other.mvccVersionDeltaWidth)
return false;
if (mvccVersionIndexWidth != other.mvccVersionIndexWidth)
return false;
if (minMvccVersion != other.minMvccVersion)
return false;
if (minTimestamp != other.minTimestamp)
return false;
if (nextNodeOffsetWidth != other.nextNodeOffsetWidth)
return false;
if (numValueBytes != other.numValueBytes)
return false;
if (numFamilyBytes != other.numFamilyBytes)
return false;
if (numMvccVersionBytes != other.numMvccVersionBytes)
return false;
if (numMetaBytes != other.numMetaBytes)
return false;
if (numQualifierBytes != other.numQualifierBytes)
return false;
if (numTagsBytes != other.numTagsBytes)
return false;
if (numRowBytes != other.numRowBytes)
return false;
if (numTimestampBytes != other.numTimestampBytes)
return false;
if (numUniqueFamilies != other.numUniqueFamilies)
return false;
if (numUniqueQualifiers != other.numUniqueQualifiers)
return false;
if (numUniqueTags != other.numUniqueTags)
return false;
if (numUniqueRows != other.numUniqueRows)
return false;
if (numKeyValueBytes != other.numKeyValueBytes)
return false;
if (qualifierOffsetWidth != other.qualifierOffsetWidth)
return false;
if(tagsOffsetWidth != other.tagsOffsetWidth)
return false;
if (rowTreeDepth != other.rowTreeDepth)
return false;
if (timestampDeltaWidth != other.timestampDeltaWidth)
return false;
if (timestampIndexWidth != other.timestampIndexWidth)
return false;
if (version != other.version)
return false;
return true;
}
/**
* Generated by Eclipse
*/
@Override
public int hashCode() {
final int prime = 31;
int result = 1;
result = prime * result + (allSameType ? 1231 : 1237);
result = prime * result + allTypes;
result = prime * result + bufferOffset;
result = prime * result + valueLengthWidth;
result = prime * result + valueOffsetWidth;
result = prime * result + familyOffsetWidth;
result = prime * result + (includesMvccVersion ? 1231 : 1237);
result = prime * result + maxQualifierLength;
result = prime * result + maxTagsLength;
result = prime * result + maxRowLength;
result = prime * result + mvccVersionDeltaWidth;
result = prime * result + mvccVersionIndexWidth;
result = prime * result + (int) (minMvccVersion ^ (minMvccVersion >>> 32));
result = prime * result + (int) (minTimestamp ^ (minTimestamp >>> 32));
result = prime * result + nextNodeOffsetWidth;
result = prime * result + numValueBytes;
result = prime * result + numFamilyBytes;
result = prime * result + numMvccVersionBytes;
result = prime * result + numMetaBytes;
result = prime * result + numQualifierBytes;
result = prime * result + numTagsBytes;
result = prime * result + numRowBytes;
result = prime * result + numTimestampBytes;
result = prime * result + numUniqueFamilies;
result = prime * result + numUniqueQualifiers;
result = prime * result + numUniqueTags;
result = prime * result + numUniqueRows;
result = prime * result + numKeyValueBytes;
result = prime * result + qualifierOffsetWidth;
result = prime * result + tagsOffsetWidth;
result = prime * result + rowTreeDepth;
result = prime * result + timestampDeltaWidth;
result = prime * result + timestampIndexWidth;
result = prime * result + version;
return result;
}
/**
* Generated by Eclipse
*/
@Override
public String toString() {
StringBuilder builder = new StringBuilder();
builder.append("PtBlockMeta [bufferOffset=");
builder.append(bufferOffset);
builder.append(", version=");
builder.append(version);
builder.append(", numMetaBytes=");
builder.append(numMetaBytes);
builder.append(", numKeyValueBytes=");
builder.append(numKeyValueBytes);
builder.append(", includesMvccVersion=");
builder.append(includesMvccVersion);
builder.append(", numRowBytes=");
builder.append(numRowBytes);
builder.append(", numFamilyBytes=");
builder.append(numFamilyBytes);
builder.append(", numQualifierBytes=");
builder.append(numQualifierBytes);
builder.append(", numTimestampBytes=");
builder.append(numTimestampBytes);
builder.append(", numMvccVersionBytes=");
builder.append(numMvccVersionBytes);
builder.append(", numValueBytes=");
builder.append(numValueBytes);
builder.append(", numTagBytes=");
builder.append(numTagsBytes);
builder.append(", nextNodeOffsetWidth=");
builder.append(nextNodeOffsetWidth);
builder.append(", familyOffsetWidth=");
builder.append(familyOffsetWidth);
builder.append(", qualifierOffsetWidth=");
builder.append(qualifierOffsetWidth);
builder.append(", tagOffsetWidth=");
builder.append(tagsOffsetWidth);
builder.append(", timestampIndexWidth=");
builder.append(timestampIndexWidth);
builder.append(", mvccVersionIndexWidth=");
builder.append(mvccVersionIndexWidth);
builder.append(", valueOffsetWidth=");
builder.append(valueOffsetWidth);
builder.append(", valueLengthWidth=");
builder.append(valueLengthWidth);
builder.append(", rowTreeDepth=");
builder.append(rowTreeDepth);
builder.append(", maxRowLength=");
builder.append(maxRowLength);
builder.append(", maxQualifierLength=");
builder.append(maxQualifierLength);
builder.append(", maxTagLength=");
builder.append(maxTagsLength);
builder.append(", minTimestamp=");
builder.append(minTimestamp);
builder.append(", timestampDeltaWidth=");
builder.append(timestampDeltaWidth);
builder.append(", minMvccVersion=");
builder.append(minMvccVersion);
builder.append(", mvccVersionDeltaWidth=");
builder.append(mvccVersionDeltaWidth);
builder.append(", allSameType=");
builder.append(allSameType);
builder.append(", allTypes=");
builder.append(allTypes);
builder.append(", numUniqueRows=");
builder.append(numUniqueRows);
builder.append(", numUniqueFamilies=");
builder.append(numUniqueFamilies);
builder.append(", numUniqueQualifiers=");
builder.append(numUniqueQualifiers);
builder.append(", numUniqueTags=");
builder.append(numUniqueTags);
builder.append("]");
return builder.toString();
}
/************** absolute getters *******************/
public int getAbsoluteRowOffset() {
return getBufferOffset() + numMetaBytes;
}
public int getAbsoluteFamilyOffset() {
return getAbsoluteRowOffset() + numRowBytes;
}
public int getAbsoluteQualifierOffset() {
return getAbsoluteFamilyOffset() + numFamilyBytes;
}
public int getAbsoluteTagsOffset() {
return getAbsoluteQualifierOffset() + numQualifierBytes;
}
public int getAbsoluteTimestampOffset() {
return getAbsoluteTagsOffset() + numTagsBytes;
}
public int getAbsoluteMvccVersionOffset() {
return getAbsoluteTimestampOffset() + numTimestampBytes;
}
public int getAbsoluteValueOffset() {
return getAbsoluteMvccVersionOffset() + numMvccVersionBytes;
}
/*************** get/set ***************************/
public int getTimestampDeltaWidth() {
return timestampDeltaWidth;
}
public void setTimestampDeltaWidth(int timestampDeltaWidth) {
this.timestampDeltaWidth = timestampDeltaWidth;
}
public int getValueOffsetWidth() {
return valueOffsetWidth;
}
public int getTagsOffsetWidth() {
return tagsOffsetWidth;
}
public void setValueOffsetWidth(int dataOffsetWidth) {
this.valueOffsetWidth = dataOffsetWidth;
}
public void setTagsOffsetWidth(int dataOffsetWidth) {
this.tagsOffsetWidth = dataOffsetWidth;
}
public int getValueLengthWidth() {
return valueLengthWidth;
}
public void setValueLengthWidth(int dataLengthWidth) {
this.valueLengthWidth = dataLengthWidth;
}
public int getMaxRowLength() {
return maxRowLength;
}
public void setMaxRowLength(int maxRowLength) {
this.maxRowLength = maxRowLength;
}
public long getMinTimestamp() {
return minTimestamp;
}
public void setMinTimestamp(long minTimestamp) {
this.minTimestamp = minTimestamp;
}
public byte getAllTypes() {
return allTypes;
}
public void setAllTypes(byte allTypes) {
this.allTypes = allTypes;
}
public boolean isAllSameType() {
return allSameType;
}
public void setAllSameType(boolean allSameType) {
this.allSameType = allSameType;
}
public int getNextNodeOffsetWidth() {
return nextNodeOffsetWidth;
}
public void setNextNodeOffsetWidth(int nextNodeOffsetWidth) {
this.nextNodeOffsetWidth = nextNodeOffsetWidth;
}
public int getNumRowBytes() {
return numRowBytes;
}
public void setNumRowBytes(int numRowBytes) {
this.numRowBytes = numRowBytes;
}
public int getNumTimestampBytes() {
return numTimestampBytes;
}
public void setNumTimestampBytes(int numTimestampBytes) {
this.numTimestampBytes = numTimestampBytes;
}
public int getNumValueBytes() {
return numValueBytes;
}
public int getNumTagsBytes() {
return numTagsBytes;
}
public void setNumTagsBytes(int numTagBytes){
this.numTagsBytes = numTagBytes;
}
public void setNumValueBytes(int numValueBytes) {
this.numValueBytes = numValueBytes;
}
public int getNumMetaBytes() {
return numMetaBytes;
}
public void setNumMetaBytes(int numMetaBytes) {
this.numMetaBytes = numMetaBytes;
}
public int getBufferOffset() {
return bufferOffset;
}
public void setBufferOffset(int bufferOffset) {
this.bufferOffset = bufferOffset;
}
public int getNumKeyValueBytes() {
return numKeyValueBytes;
}
public void setNumKeyValueBytes(int numKeyValueBytes) {
this.numKeyValueBytes = numKeyValueBytes;
}
public int getRowTreeDepth() {
return rowTreeDepth;
}
public void setRowTreeDepth(int rowTreeDepth) {
this.rowTreeDepth = rowTreeDepth;
}
public int getNumMvccVersionBytes() {
return numMvccVersionBytes;
}
public void setNumMvccVersionBytes(int numMvccVersionBytes) {
this.numMvccVersionBytes = numMvccVersionBytes;
}
public int getMvccVersionDeltaWidth() {
return mvccVersionDeltaWidth;
}
public void setMvccVersionDeltaWidth(int mvccVersionDeltaWidth) {
this.mvccVersionDeltaWidth = mvccVersionDeltaWidth;
}
public long getMinMvccVersion() {
return minMvccVersion;
}
public void setMinMvccVersion(long minMvccVersion) {
this.minMvccVersion = minMvccVersion;
}
public int getNumFamilyBytes() {
return numFamilyBytes;
}
public void setNumFamilyBytes(int numFamilyBytes) {
this.numFamilyBytes = numFamilyBytes;
}
public int getFamilyOffsetWidth() {
return familyOffsetWidth;
}
public void setFamilyOffsetWidth(int familyOffsetWidth) {
this.familyOffsetWidth = familyOffsetWidth;
}
public int getNumUniqueRows() {
return numUniqueRows;
}
public void setNumUniqueRows(int numUniqueRows) {
this.numUniqueRows = numUniqueRows;
}
public int getNumUniqueFamilies() {
return numUniqueFamilies;
}
public void setNumUniqueFamilies(int numUniqueFamilies) {
this.numUniqueFamilies = numUniqueFamilies;
}
public int getNumUniqueQualifiers() {
return numUniqueQualifiers;
}
public void setNumUniqueQualifiers(int numUniqueQualifiers) {
this.numUniqueQualifiers = numUniqueQualifiers;
}
public void setNumUniqueTags(int numUniqueTags) {
this.numUniqueTags = numUniqueTags;
}
public int getNumUniqueTags() {
return numUniqueTags;
}
public int getNumQualifierBytes() {
return numQualifierBytes;
}
public void setNumQualifierBytes(int numQualifierBytes) {
this.numQualifierBytes = numQualifierBytes;
}
public int getQualifierOffsetWidth() {
return qualifierOffsetWidth;
}
public void setQualifierOffsetWidth(int qualifierOffsetWidth) {
this.qualifierOffsetWidth = qualifierOffsetWidth;
}
public int getMaxQualifierLength() {
return maxQualifierLength;
}
// TODO : decide on some max value for this ? INTEGER_MAX?
public void setMaxQualifierLength(int maxQualifierLength) {
this.maxQualifierLength = maxQualifierLength;
}
public int getMaxTagsLength() {
return this.maxTagsLength;
}
public void setMaxTagsLength(int maxTagLength) {
this.maxTagsLength = maxTagLength;
}
public int getTimestampIndexWidth() {
return timestampIndexWidth;
}
public void setTimestampIndexWidth(int timestampIndexWidth) {
this.timestampIndexWidth = timestampIndexWidth;
}
public int getMvccVersionIndexWidth() {
return mvccVersionIndexWidth;
}
public void setMvccVersionIndexWidth(int mvccVersionIndexWidth) {
this.mvccVersionIndexWidth = mvccVersionIndexWidth;
}
public int getVersion() {
return version;
}
public void setVersion(int version) {
this.version = version;
}
public boolean isIncludesMvccVersion() {
return includesMvccVersion;
}
public void setIncludesMvccVersion(boolean includesMvccVersion) {
this.includesMvccVersion = includesMvccVersion;
}
}

View File

@ -1,216 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.codec.prefixtree;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.IOException;
import java.nio.ByteBuffer;
import org.apache.yetus.audience.InterfaceAudience;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellComparator;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.CellComparatorImpl.MetaCellComparator;
import org.apache.hadoop.hbase.KeyValueUtil;
import org.apache.hadoop.hbase.codec.prefixtree.decode.DecoderFactory;
import org.apache.hadoop.hbase.codec.prefixtree.decode.PrefixTreeArraySearcher;
import org.apache.hadoop.hbase.codec.prefixtree.encode.EncoderFactory;
import org.apache.hadoop.hbase.codec.prefixtree.encode.PrefixTreeEncoder;
import org.apache.hadoop.hbase.codec.prefixtree.scanner.CellSearcher;
import org.apache.hadoop.hbase.io.encoding.DataBlockEncoder;
import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
import org.apache.hadoop.hbase.io.encoding.EncodingState;
import org.apache.hadoop.hbase.io.encoding.HFileBlockDecodingContext;
import org.apache.hadoop.hbase.io.encoding.HFileBlockDefaultDecodingContext;
import org.apache.hadoop.hbase.io.encoding.HFileBlockDefaultEncodingContext;
import org.apache.hadoop.hbase.io.encoding.HFileBlockEncodingContext;
import org.apache.hadoop.hbase.io.hfile.BlockType;
import org.apache.hadoop.hbase.io.hfile.HFileContext;
import org.apache.hadoop.hbase.nio.ByteBuff;
import org.apache.hadoop.hbase.nio.SingleByteBuff;
import org.apache.hadoop.hbase.util.ByteBufferUtils;
import org.apache.hadoop.io.WritableUtils;
/**
* <p>
* This class is created via reflection in DataBlockEncoding enum. Update the enum if class name or
* package changes.
* </p>
* PrefixTreeDataBlockEncoder implementation of DataBlockEncoder. This is the primary entry point
* for PrefixTree encoding and decoding. Encoding is delegated to instances of
* {@link PrefixTreeEncoder}, and decoding is delegated to instances of
* {@link org.apache.hadoop.hbase.codec.prefixtree.scanner.CellSearcher}.
* Encoder and decoder instances are
* created and recycled by static PtEncoderFactory and PtDecoderFactory.
*/
@InterfaceAudience.Private
public class PrefixTreeCodec implements DataBlockEncoder {
/**
* no-arg constructor for reflection
*/
public PrefixTreeCodec() {
}
@Override
public ByteBuffer decodeKeyValues(DataInputStream source, HFileBlockDecodingContext decodingCtx)
throws IOException {
return decodeKeyValues(source, 0, 0, decodingCtx);
}
/**
* I don't think this method is called during normal HBase operation, so efficiency is not
* important.
*/
public ByteBuffer decodeKeyValues(DataInputStream source, int allocateHeaderLength,
int skipLastBytes, HFileBlockDecodingContext decodingCtx) throws IOException {
ByteBuffer sourceAsBuffer = ByteBufferUtils.drainInputStreamToBuffer(source);// waste
sourceAsBuffer.mark();
PrefixTreeBlockMeta blockMeta = new PrefixTreeBlockMeta(new SingleByteBuff(sourceAsBuffer));
sourceAsBuffer.rewind();
int numV1BytesWithHeader = allocateHeaderLength + blockMeta.getNumKeyValueBytes();
byte[] keyValueBytesWithHeader = new byte[numV1BytesWithHeader];
ByteBuffer result = ByteBuffer.wrap(keyValueBytesWithHeader);
result.rewind();
CellSearcher searcher = null;
try {
boolean includesMvcc = decodingCtx.getHFileContext().isIncludesMvcc();
searcher = DecoderFactory.checkOut(new SingleByteBuff(sourceAsBuffer), includesMvcc);
while (searcher.advance()) {
KeyValue currentCell = KeyValueUtil.copyToNewKeyValue(searcher.current());
// needs to be modified for DirectByteBuffers. no existing methods to
// write VLongs to byte[]
int offset = result.arrayOffset() + result.position();
System.arraycopy(currentCell.getBuffer(), currentCell.getOffset(), result.array(), offset,
currentCell.getLength());
int keyValueLength = KeyValueUtil.length(currentCell);
ByteBufferUtils.skip(result, keyValueLength);
offset += keyValueLength;
if (includesMvcc) {
ByteBufferUtils.writeVLong(result, currentCell.getSequenceId());
}
}
result.position(result.limit());//make it appear as if we were appending
return result;
} finally {
DecoderFactory.checkIn(searcher);
}
}
@Override
public Cell getFirstKeyCellInBlock(ByteBuff block) {
block.rewind();
PrefixTreeArraySearcher searcher = null;
try {
// should i includeMemstoreTS (second argument)? i think PrefixKeyDeltaEncoder is, so i will
searcher = DecoderFactory.checkOut(block, true);
if (!searcher.positionAtFirstCell()) {
return null;
}
return searcher.current();
} finally {
DecoderFactory.checkIn(searcher);
}
}
@Override
public HFileBlockEncodingContext newDataBlockEncodingContext(
DataBlockEncoding encoding, byte[] header, HFileContext meta) {
if(DataBlockEncoding.PREFIX_TREE != encoding){
//i'm not sure why encoding is in the interface. Each encoder implementation should probably
//know it's encoding type
throw new IllegalArgumentException("only DataBlockEncoding.PREFIX_TREE supported");
}
return new HFileBlockDefaultEncodingContext(encoding, header, meta);
}
@Override
public HFileBlockDecodingContext newDataBlockDecodingContext(HFileContext meta) {
return new HFileBlockDefaultDecodingContext(meta);
}
/**
* Is this the correct handling of an illegal comparator? How to prevent that from getting all
* the way to this point.
*/
@Override
public EncodedSeeker createSeeker(CellComparator comparator,
HFileBlockDecodingContext decodingCtx) {
if (comparator instanceof MetaCellComparator) {
throw new IllegalArgumentException(
"DataBlockEncoding.PREFIX_TREE not compatible with hbase:meta " + "table");
}
return new PrefixTreeSeeker(decodingCtx.getHFileContext().isIncludesMvcc());
}
@Override
public int encode(Cell cell, HFileBlockEncodingContext encodingCtx, DataOutputStream out)
throws IOException {
PrefixTreeEncodingState state = (PrefixTreeEncodingState) encodingCtx.getEncodingState();
PrefixTreeEncoder builder = state.builder;
builder.write(cell);
int size = KeyValueUtil.length(cell);
if (encodingCtx.getHFileContext().isIncludesMvcc()) {
size += WritableUtils.getVIntSize(cell.getSequenceId());
}
return size;
}
private static class PrefixTreeEncodingState extends EncodingState {
PrefixTreeEncoder builder = null;
}
@Override
public void startBlockEncoding(HFileBlockEncodingContext blkEncodingCtx, DataOutputStream out)
throws IOException {
if (blkEncodingCtx.getClass() != HFileBlockDefaultEncodingContext.class) {
throw new IOException(this.getClass().getName() + " only accepts "
+ HFileBlockDefaultEncodingContext.class.getName() + " as the " + "encoding context.");
}
HFileBlockDefaultEncodingContext encodingCtx =
(HFileBlockDefaultEncodingContext) blkEncodingCtx;
encodingCtx.prepareEncoding(out);
PrefixTreeEncoder builder = EncoderFactory.checkOut(out, encodingCtx.getHFileContext()
.isIncludesMvcc());
PrefixTreeEncodingState state = new PrefixTreeEncodingState();
state.builder = builder;
blkEncodingCtx.setEncodingState(state);
}
@Override
public void endBlockEncoding(HFileBlockEncodingContext encodingCtx, DataOutputStream out,
byte[] uncompressedBytesWithHeader) throws IOException {
PrefixTreeEncodingState state = (PrefixTreeEncodingState) encodingCtx.getEncodingState();
PrefixTreeEncoder builder = state.builder;
builder.flush();
EncoderFactory.checkIn(builder);
// do i need to check this, or will it always be DataBlockEncoding.PREFIX_TREE?
if (encodingCtx.getDataBlockEncoding() != DataBlockEncoding.NONE) {
encodingCtx.postEncoding(BlockType.ENCODED_DATA);
} else {
encodingCtx.postEncoding(BlockType.DATA);
}
}
}

View File

@ -1,586 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.codec.prefixtree;
import java.nio.ByteBuffer;
import org.apache.hadoop.hbase.ByteBufferCell;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellComparator;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.PrivateCellUtil;
import org.apache.hadoop.hbase.KeyValue.Type;
import org.apache.hadoop.hbase.SettableSequenceId;
import org.apache.yetus.audience.InterfaceAudience;
import org.apache.hadoop.hbase.codec.prefixtree.decode.DecoderFactory;
import org.apache.hadoop.hbase.codec.prefixtree.decode.PrefixTreeArraySearcher;
import org.apache.hadoop.hbase.codec.prefixtree.scanner.CellScannerPosition;
import org.apache.hadoop.hbase.io.HeapSize;
import org.apache.hadoop.hbase.io.encoding.DataBlockEncoder.EncodedSeeker;
import org.apache.hadoop.hbase.nio.ByteBuff;
import org.apache.hadoop.hbase.util.ByteBufferUtils;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.ClassSize;
/**
* These methods have the same definition as any implementation of the EncodedSeeker.
*
* In the future, the EncodedSeeker could be modified to work with the Cell interface directly. It
* currently returns a new KeyValue object each time getKeyValue is called. This is not horrible,
* but in order to create a new KeyValue object, we must first allocate a new byte[] and copy in
* the data from the PrefixTreeCell. It is somewhat heavyweight right now.
*/
@InterfaceAudience.Private
public class PrefixTreeSeeker implements EncodedSeeker {
protected boolean includeMvccVersion;
protected PrefixTreeArraySearcher ptSearcher;
public PrefixTreeSeeker(boolean includeMvccVersion) {
this.includeMvccVersion = includeMvccVersion;
}
@Override
public void setCurrentBuffer(ByteBuff fullBlockBuffer) {
ptSearcher = DecoderFactory.checkOut(fullBlockBuffer, includeMvccVersion);
rewind();
}
/**
* <p>
* Currently unused.
* </p>
* TODO performance leak. should reuse the searchers. hbase does not currently have a hook where
* this can be called
*/
public void releaseCurrentSearcher(){
DecoderFactory.checkIn(ptSearcher);
}
@Override
public Cell getKey() {
return ptSearcher.current();
}
@Override
public ByteBuffer getValueShallowCopy() {
return PrivateCellUtil.getValueBufferShallowCopy(ptSearcher.current());
}
/**
* currently must do deep copy into new array
*/
@Override
public Cell getCell() {
// The PrefixTreecell is of type BytebufferedCell and the value part of the cell
// determines whether we are offheap cell or onheap cell. All other parts of the cell-
// row, fam and col are all represented as onheap byte[]
ByteBufferCell cell = (ByteBufferCell)ptSearcher.current();
if (cell == null) {
return null;
}
// Use the ByteBuffered cell to see if the Cell is onheap or offheap
if (cell.getValueByteBuffer().hasArray()) {
return new OnheapPrefixTreeCell(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength(),
cell.getFamilyArray(), cell.getFamilyOffset(), cell.getFamilyLength(),
cell.getQualifierArray(), cell.getQualifierOffset(), cell.getQualifierLength(),
cell.getValueArray(), cell.getValueOffset(), cell.getValueLength(), cell.getTagsArray(),
cell.getTagsOffset(), cell.getTagsLength(), cell.getTimestamp(), cell.getTypeByte(),
cell.getSequenceId());
} else {
return new OffheapPrefixTreeCell(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength(),
cell.getFamilyArray(), cell.getFamilyOffset(), cell.getFamilyLength(),
cell.getQualifierArray(), cell.getQualifierOffset(), cell.getQualifierLength(),
cell.getValueByteBuffer(), cell.getValuePosition(), cell.getValueLength(),
cell.getTagsArray(), cell.getTagsOffset(), cell.getTagsLength(), cell.getTimestamp(),
cell.getTypeByte(), cell.getSequenceId());
}
}
/**
* <p>
* Currently unused.
* </p><p>
* A nice, lightweight reference, though the underlying cell is transient. This method may return
* the same reference to the backing PrefixTreeCell repeatedly, while other implementations may
* return a different reference for each Cell.
* </p>
* The goal will be to transition the upper layers of HBase, like Filters and KeyValueHeap, to
* use this method instead of the getKeyValue() methods above.
*/
public Cell get() {
return ptSearcher.current();
}
@Override
public void rewind() {
ptSearcher.positionAtFirstCell();
}
@Override
public boolean next() {
return ptSearcher.advance();
}
public boolean advance() {
return ptSearcher.advance();
}
private static final boolean USE_POSITION_BEFORE = false;
/*
* Support both of these options since the underlying PrefixTree supports
* both. Possibly expand the EncodedSeeker to utilize them both.
*/
protected int seekToOrBeforeUsingPositionAtOrBefore(Cell kv, boolean seekBefore) {
// this does a deep copy of the key byte[] because the CellSearcher
// interface wants a Cell
CellScannerPosition position = ptSearcher.seekForwardToOrBefore(kv);
if (CellScannerPosition.AT == position) {
if (seekBefore) {
ptSearcher.previous();
return 1;
}
return 0;
}
return 1;
}
protected int seekToOrBeforeUsingPositionAtOrAfter(Cell kv, boolean seekBefore) {
// should probably switch this to use the seekForwardToOrBefore method
CellScannerPosition position = ptSearcher.seekForwardToOrAfter(kv);
if (CellScannerPosition.AT == position) {
if (seekBefore) {
ptSearcher.previous();
return 1;
}
return 0;
}
if (CellScannerPosition.AFTER == position) {
if (!ptSearcher.isBeforeFirst()) {
ptSearcher.previous();
}
return 1;
}
if (position == CellScannerPosition.AFTER_LAST) {
if (seekBefore) {
ptSearcher.previous();
}
return 1;
}
throw new RuntimeException("unexpected CellScannerPosition:" + position);
}
@Override
public int seekToKeyInBlock(Cell key, boolean forceBeforeOnExactMatch) {
if (USE_POSITION_BEFORE) {
return seekToOrBeforeUsingPositionAtOrBefore(key, forceBeforeOnExactMatch);
} else {
return seekToOrBeforeUsingPositionAtOrAfter(key, forceBeforeOnExactMatch);
}
}
@Override
public int compareKey(CellComparator comparator, Cell key) {
return comparator.compare(key,
ptSearcher.current());
}
/**
* Cloned version of the PrefixTreeCell where except the value part, the rest
* of the key part is deep copied
*
*/
private static class OnheapPrefixTreeCell implements Cell, SettableSequenceId, HeapSize {
private static final long FIXED_OVERHEAD = ClassSize.align(ClassSize.OBJECT
+ (5 * ClassSize.REFERENCE) + (2 * Bytes.SIZEOF_LONG) + (4 * Bytes.SIZEOF_INT)
+ (Bytes.SIZEOF_SHORT) + (2 * Bytes.SIZEOF_BYTE) + (5 * ClassSize.ARRAY));
private byte[] row;
private short rowLength;
private byte[] fam;
private byte famLength;
private byte[] qual;
private int qualLength;
private byte[] val;
private int valOffset;
private int valLength;
private byte[] tag;
private int tagsLength;
private long ts;
private long seqId;
private byte type;
public OnheapPrefixTreeCell(byte[] row, int rowOffset, short rowLength, byte[] fam,
int famOffset, byte famLength, byte[] qual, int qualOffset, int qualLength, byte[] val,
int valOffset, int valLength, byte[] tag, int tagOffset, int tagLength, long ts, byte type,
long seqId) {
this.row = new byte[rowLength];
System.arraycopy(row, rowOffset, this.row, 0, rowLength);
this.rowLength = rowLength;
this.fam = new byte[famLength];
System.arraycopy(fam, famOffset, this.fam, 0, famLength);
this.famLength = famLength;
this.qual = new byte[qualLength];
System.arraycopy(qual, qualOffset, this.qual, 0, qualLength);
this.qualLength = qualLength;
this.tag = new byte[tagLength];
System.arraycopy(tag, tagOffset, this.tag, 0, tagLength);
this.tagsLength = tagLength;
this.val = val;
this.valLength = valLength;
this.valOffset = valOffset;
this.ts = ts;
this.seqId = seqId;
this.type = type;
}
@Override
public void setSequenceId(long seqId) {
this.seqId = seqId;
}
@Override
public byte[] getRowArray() {
return this.row;
}
@Override
public int getRowOffset() {
return 0;
}
@Override
public short getRowLength() {
return this.rowLength;
}
@Override
public byte[] getFamilyArray() {
return this.fam;
}
@Override
public int getFamilyOffset() {
return 0;
}
@Override
public byte getFamilyLength() {
return this.famLength;
}
@Override
public byte[] getQualifierArray() {
return this.qual;
}
@Override
public int getQualifierOffset() {
return 0;
}
@Override
public int getQualifierLength() {
return this.qualLength;
}
@Override
public long getTimestamp() {
return ts;
}
@Override
public byte getTypeByte() {
return type;
}
@Override
public long getSequenceId() {
return seqId;
}
@Override
public byte[] getValueArray() {
return val;
}
@Override
public int getValueOffset() {
return this.valOffset;
}
@Override
public int getValueLength() {
return this.valLength;
}
@Override
public byte[] getTagsArray() {
return this.tag;
}
@Override
public int getTagsOffset() {
return 0;
}
@Override
public int getTagsLength() {
return this.tagsLength;
}
@Override
public String toString() {
String row = Bytes.toStringBinary(getRowArray(), getRowOffset(), getRowLength());
String family = Bytes.toStringBinary(getFamilyArray(), getFamilyOffset(), getFamilyLength());
String qualifier = Bytes.toStringBinary(getQualifierArray(), getQualifierOffset(),
getQualifierLength());
String timestamp = String.valueOf((getTimestamp()));
return row + "/" + family + (family != null && family.length() > 0 ? ":" : "") + qualifier
+ "/" + timestamp + "/" + Type.codeToType(type);
}
@Override
public long heapSize() {
return FIXED_OVERHEAD + rowLength + famLength + qualLength + valLength + tagsLength;
}
}
private static class OffheapPrefixTreeCell extends ByteBufferCell implements Cell,
SettableSequenceId, HeapSize {
private static final long FIXED_OVERHEAD = ClassSize.align(ClassSize.OBJECT
+ (5 * ClassSize.REFERENCE) + (2 * Bytes.SIZEOF_LONG) + (4 * Bytes.SIZEOF_INT)
+ (Bytes.SIZEOF_SHORT) + (2 * Bytes.SIZEOF_BYTE) + (5 * ClassSize.BYTE_BUFFER));
private ByteBuffer rowBuff;
private short rowLength;
private ByteBuffer famBuff;
private byte famLength;
private ByteBuffer qualBuff;
private int qualLength;
private ByteBuffer val;
private int valOffset;
private int valLength;
private ByteBuffer tagBuff;
private int tagsLength;
private long ts;
private long seqId;
private byte type;
public OffheapPrefixTreeCell(byte[] row, int rowOffset, short rowLength, byte[] fam,
int famOffset, byte famLength, byte[] qual, int qualOffset, int qualLength, ByteBuffer val,
int valOffset, int valLength, byte[] tag, int tagOffset, int tagLength, long ts, byte type,
long seqId) {
byte[] tmpRow = new byte[rowLength];
System.arraycopy(row, rowOffset, tmpRow, 0, rowLength);
this.rowBuff = ByteBuffer.wrap(tmpRow);
this.rowLength = rowLength;
byte[] tmpFam = new byte[famLength];
System.arraycopy(fam, famOffset, tmpFam, 0, famLength);
this.famBuff = ByteBuffer.wrap(tmpFam);
this.famLength = famLength;
byte[] tmpQual = new byte[qualLength];
System.arraycopy(qual, qualOffset, tmpQual, 0, qualLength);
this.qualBuff = ByteBuffer.wrap(tmpQual);
this.qualLength = qualLength;
byte[] tmpTag = new byte[tagLength];
System.arraycopy(tag, tagOffset, tmpTag, 0, tagLength);
this.tagBuff = ByteBuffer.wrap(tmpTag);
this.tagsLength = tagLength;
this.val = val;
this.valLength = valLength;
this.valOffset = valOffset;
this.ts = ts;
this.seqId = seqId;
this.type = type;
}
@Override
public void setSequenceId(long seqId) {
this.seqId = seqId;
}
@Override
public byte[] getRowArray() {
return this.rowBuff.array();
}
@Override
public int getRowOffset() {
return getRowPosition();
}
@Override
public short getRowLength() {
return this.rowLength;
}
@Override
public byte[] getFamilyArray() {
return this.famBuff.array();
}
@Override
public int getFamilyOffset() {
return getFamilyPosition();
}
@Override
public byte getFamilyLength() {
return this.famLength;
}
@Override
public byte[] getQualifierArray() {
return this.qualBuff.array();
}
@Override
public int getQualifierOffset() {
return getQualifierPosition();
}
@Override
public int getQualifierLength() {
return this.qualLength;
}
@Override
public long getTimestamp() {
return ts;
}
@Override
public byte getTypeByte() {
return type;
}
@Override
public long getSequenceId() {
return seqId;
}
@Override
public byte[] getValueArray() {
byte[] tmpVal = new byte[valLength];
ByteBufferUtils.copyFromBufferToArray(tmpVal, val, valOffset, 0, valLength);
return tmpVal;
}
@Override
public int getValueOffset() {
return 0;
}
@Override
public int getValueLength() {
return this.valLength;
}
@Override
public byte[] getTagsArray() {
return this.tagBuff.array();
}
@Override
public int getTagsOffset() {
return getTagsPosition();
}
@Override
public int getTagsLength() {
return this.tagsLength;
}
@Override
public ByteBuffer getRowByteBuffer() {
return this.rowBuff;
}
@Override
public int getRowPosition() {
return 0;
}
@Override
public ByteBuffer getFamilyByteBuffer() {
return this.famBuff;
}
@Override
public int getFamilyPosition() {
return 0;
}
@Override
public ByteBuffer getQualifierByteBuffer() {
return this.qualBuff;
}
@Override
public int getQualifierPosition() {
return 0;
}
@Override
public ByteBuffer getTagsByteBuffer() {
return this.tagBuff;
}
@Override
public int getTagsPosition() {
return 0;
}
@Override
public ByteBuffer getValueByteBuffer() {
return this.val;
}
@Override
public int getValuePosition() {
return this.valOffset;
}
@Override
public long heapSize() {
return FIXED_OVERHEAD;
}
@Override
public String toString() {
String row = Bytes.toStringBinary(getRowArray(), getRowOffset(), getRowLength());
String family = Bytes.toStringBinary(getFamilyArray(), getFamilyOffset(), getFamilyLength());
String qualifier = Bytes.toStringBinary(getQualifierArray(), getQualifierOffset(),
getQualifierLength());
String timestamp = String.valueOf((getTimestamp()));
return row + "/" + family + (family != null && family.length() > 0 ? ":" : "") + qualifier
+ "/" + timestamp + "/" + Type.codeToType(type);
}
}
}

View File

@ -1,63 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.codec.prefixtree.decode;
import java.util.Queue;
import java.util.concurrent.LinkedBlockingQueue;
import org.apache.yetus.audience.InterfaceAudience;
import org.apache.hadoop.hbase.nio.ByteBuff;
/**
* <p>
* Pools PrefixTreeArraySearcher objects. Each Searcher can consist of hundreds or thousands of
* objects and 1 is needed for each HFile during a Get operation. With tens of thousands of
* Gets/second, reusing these searchers may save a lot of young gen collections.
* </p>
* Alternative implementation would be a ByteBufferSearcherPool (not implemented yet).
*/
@InterfaceAudience.Private
public class ArraySearcherPool {
/**
* One decoder is needed for each storefile for each Get operation so we may need hundreds at the
* same time, however, decoding is a CPU bound activity so should limit this to something in the
* realm of maximum reasonable active threads.
*/
private static final Integer MAX_POOL_SIZE = 1000;
protected Queue<PrefixTreeArraySearcher> pool = new LinkedBlockingQueue<>(MAX_POOL_SIZE);
public PrefixTreeArraySearcher checkOut(ByteBuff buffer, boolean includesMvccVersion) {
PrefixTreeArraySearcher searcher = pool.poll();//will return null if pool is empty
searcher = DecoderFactory.ensureArraySearcherValid(buffer, searcher, includesMvccVersion);
return searcher;
}
public void checkIn(PrefixTreeArraySearcher searcher) {
searcher.releaseBlockReference();
pool.offer(searcher);
}
@Override
public String toString() {
return ("poolSize:" + pool.size());
}
}

View File

@ -1,83 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.codec.prefixtree.decode;
import org.apache.yetus.audience.InterfaceAudience;
import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeBlockMeta;
import org.apache.hadoop.hbase.codec.prefixtree.scanner.CellSearcher;
import org.apache.hadoop.hbase.nio.ByteBuff;
/**
* Static wrapper class for the ArraySearcherPool.
*/
@InterfaceAudience.Private
public class DecoderFactory {
private static final ArraySearcherPool POOL = new ArraySearcherPool();
//TODO will need a PrefixTreeSearcher on top of CellSearcher
public static PrefixTreeArraySearcher checkOut(final ByteBuff buffer,
boolean includeMvccVersion) {
PrefixTreeArraySearcher searcher = POOL.checkOut(buffer,
includeMvccVersion);
return searcher;
}
public static void checkIn(CellSearcher pSearcher) {
if (pSearcher == null) {
return;
}
if (! (pSearcher instanceof PrefixTreeArraySearcher)) {
throw new IllegalArgumentException("Cannot return "+pSearcher.getClass()+" to "
+DecoderFactory.class);
}
PrefixTreeArraySearcher searcher = (PrefixTreeArraySearcher) pSearcher;
POOL.checkIn(searcher);
}
/**************************** helper ******************************/
public static PrefixTreeArraySearcher ensureArraySearcherValid(ByteBuff buffer,
PrefixTreeArraySearcher searcher, boolean includeMvccVersion) {
if (searcher == null) {
PrefixTreeBlockMeta blockMeta = new PrefixTreeBlockMeta(buffer);
searcher = new PrefixTreeArraySearcher(blockMeta, blockMeta.getRowTreeDepth(),
blockMeta.getMaxRowLength(), blockMeta.getMaxQualifierLength(),
blockMeta.getMaxTagsLength());
searcher.initOnBlock(blockMeta, buffer, includeMvccVersion);
return searcher;
}
PrefixTreeBlockMeta blockMeta = searcher.getBlockMeta();
blockMeta.initOnBlock(buffer);
if (!searcher.areBuffersBigEnough()) {
int maxRowTreeStackNodes = Math.max(blockMeta.getRowTreeDepth(),
searcher.getMaxRowTreeStackNodes());
int rowBufferLength = Math.max(blockMeta.getMaxRowLength(), searcher.getRowBufferLength());
int qualifierBufferLength = Math.max(blockMeta.getMaxQualifierLength(),
searcher.getQualifierBufferLength());
int tagBufferLength = Math.max(blockMeta.getMaxTagsLength(), searcher.getTagBufferLength());
searcher = new PrefixTreeArraySearcher(blockMeta, maxRowTreeStackNodes, rowBufferLength,
qualifierBufferLength, tagBufferLength);
}
//this is where we parse the BlockMeta
searcher.initOnBlock(blockMeta, buffer, includeMvccVersion);
return searcher;
}
}

View File

@ -1,145 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.codec.prefixtree.decode;
import org.apache.yetus.audience.InterfaceAudience;
import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeBlockMeta;
import org.apache.hadoop.hbase.codec.prefixtree.scanner.ReversibleCellScanner;
/**
* Methods for going backwards through a PrefixTree block. This class is split out on its own to
* simplify the Scanner superclass and Searcher subclass.
*/
@InterfaceAudience.Private
public class PrefixTreeArrayReversibleScanner extends PrefixTreeArrayScanner implements
ReversibleCellScanner {
/***************** construct ******************************/
public PrefixTreeArrayReversibleScanner(PrefixTreeBlockMeta blockMeta, int rowTreeDepth,
int rowBufferLength, int qualifierBufferLength, int tagsBufferLength) {
super(blockMeta, rowTreeDepth, rowBufferLength, qualifierBufferLength, tagsBufferLength);
}
/***************** Object methods ***************************/
@Override
public boolean equals(Object obj) {
//trivial override to confirm intent (findbugs)
return super.equals(obj);
}
/***************** methods **********************************/
@Override
public boolean previous() {
if (afterLast) {
afterLast = false;
positionAtLastCell();
return true;
}
if (beforeFirst) {
return false;
}
if (isFirstCellInRow()) {
previousRowInternal();
if (beforeFirst) {
return false;
}
populateLastNonRowFields();
return true;
}
populatePreviousNonRowFields();
return true;
}
@Override
public boolean previousRow(boolean endOfRow) {
previousRowInternal();
if(beforeFirst){
return false;
}
if(endOfRow){
populateLastNonRowFields();
}else{
populateFirstNonRowFields();
}
return true;
}
private boolean previousRowInternal() {
if (beforeFirst) {
return false;
}
if (afterLast) {
positionAtLastRow();
return true;
}
if (currentRowNode.hasOccurrences()) {
discardCurrentRowNode(false);
if(currentRowNode==null){
return false;
}
}
while (!beforeFirst) {
if (isDirectlyAfterNub()) {//we are about to back up to the nub
currentRowNode.resetFanIndex();//sets it to -1, which is before the first leaf
nubCellsRemain = true;//this positions us on the nub
return true;
}
if (currentRowNode.hasPreviousFanNodes()) {
followPreviousFan();
descendToLastRowFromCurrentPosition();
} else {// keep going up the stack until we find previous fan positions
discardCurrentRowNode(false);
if(currentRowNode==null){
return false;
}
}
if (currentRowNode.hasOccurrences()) {// escape clause
currentRowNode.resetFanIndex();
return true;// found some values
}
}
return false;// went past the beginning
}
protected boolean isDirectlyAfterNub() {
return currentRowNode.isNub() && currentRowNode.getFanIndex()==0;
}
protected void positionAtLastRow() {
reInitFirstNode();
descendToLastRowFromCurrentPosition();
}
protected void descendToLastRowFromCurrentPosition() {
while (currentRowNode.hasChildren()) {
followLastFan();
}
}
protected void positionAtLastCell() {
positionAtLastRow();
populateLastNonRowFields();
}
}

View File

@ -1,528 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.codec.prefixtree.decode;
import org.apache.yetus.audience.InterfaceAudience;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellScanner;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.PrivateCellUtil;
import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeBlockMeta;
import org.apache.hadoop.hbase.codec.prefixtree.decode.column.ColumnReader;
import org.apache.hadoop.hbase.codec.prefixtree.decode.row.RowNodeReader;
import org.apache.hadoop.hbase.codec.prefixtree.decode.timestamp.MvccVersionDecoder;
import org.apache.hadoop.hbase.codec.prefixtree.decode.timestamp.TimestampDecoder;
import org.apache.hadoop.hbase.codec.prefixtree.encode.other.ColumnNodeType;
import org.apache.hadoop.hbase.nio.ByteBuff;
/**
* Extends PtCell and manipulates its protected fields. Could alternatively contain a PtCell and
* call get/set methods.
*
* This is an "Array" scanner to distinguish from a future "ByteBuffer" scanner. This
* implementation requires that the bytes be in a normal java byte[] for performance. The
* alternative ByteBuffer implementation would allow for accessing data in an off-heap ByteBuffer
* without copying the whole buffer on-heap.
*/
@InterfaceAudience.Private
public class PrefixTreeArrayScanner extends PrefixTreeCell implements CellScanner {
/***************** fields ********************************/
protected PrefixTreeBlockMeta blockMeta;
protected boolean beforeFirst;
protected boolean afterLast;
protected RowNodeReader[] rowNodes;
protected int rowNodeStackIndex;
protected RowNodeReader currentRowNode;
protected ColumnReader familyReader;
protected ColumnReader qualifierReader;
protected ColumnReader tagsReader;
protected TimestampDecoder timestampDecoder;
protected MvccVersionDecoder mvccVersionDecoder;
protected boolean nubCellsRemain;
protected int currentCellIndex;
/*********************** construct ******************************/
// pass in blockMeta so we can initialize buffers big enough for all cells in the block
public PrefixTreeArrayScanner(PrefixTreeBlockMeta blockMeta, int rowTreeDepth,
int rowBufferLength, int qualifierBufferLength, int tagsBufferLength) {
this.rowNodes = new RowNodeReader[rowTreeDepth];
for (int i = 0; i < rowNodes.length; ++i) {
rowNodes[i] = new RowNodeReader();
}
this.rowBuffer = new byte[rowBufferLength];
this.familyBuffer = new byte[PrefixTreeBlockMeta.MAX_FAMILY_LENGTH];
this.familyReader = new ColumnReader(familyBuffer, ColumnNodeType.FAMILY);
this.qualifierBuffer = new byte[qualifierBufferLength];
this.tagsBuffer = new byte[tagsBufferLength];
this.qualifierReader = new ColumnReader(qualifierBuffer, ColumnNodeType.QUALIFIER);
this.tagsReader = new ColumnReader(tagsBuffer, ColumnNodeType.TAGS);
this.timestampDecoder = new TimestampDecoder();
this.mvccVersionDecoder = new MvccVersionDecoder();
}
/**************** init helpers ***************************************/
/**
* Call when first accessing a block.
* @return entirely new scanner if false
*/
public boolean areBuffersBigEnough() {
if (rowNodes.length < blockMeta.getRowTreeDepth()) {
return false;
}
if (rowBuffer.length < blockMeta.getMaxRowLength()) {
return false;
}
if (qualifierBuffer.length < blockMeta.getMaxQualifierLength()) {
return false;
}
if(tagsBuffer.length < blockMeta.getMaxTagsLength()) {
return false;
}
return true;
}
public void initOnBlock(PrefixTreeBlockMeta blockMeta, ByteBuff block,
boolean includeMvccVersion) {
this.block = block;
this.blockMeta = blockMeta;
this.familyOffset = familyBuffer.length;
this.familyReader.initOnBlock(blockMeta, block);
this.qualifierOffset = qualifierBuffer.length;
this.qualifierReader.initOnBlock(blockMeta, block);
this.tagsOffset = tagsBuffer.length;
this.tagsReader.initOnBlock(blockMeta, block);
this.timestampDecoder.initOnBlock(blockMeta, block);
this.mvccVersionDecoder.initOnBlock(blockMeta, block);
this.includeMvccVersion = includeMvccVersion;
resetToBeforeFirstEntry();
}
// Does this have to be in the CellScanner Interface? TODO
public void resetToBeforeFirstEntry() {
beforeFirst = true;
afterLast = false;
rowNodeStackIndex = -1;
currentRowNode = null;
rowLength = 0;
familyOffset = familyBuffer.length;
familyLength = 0;
qualifierOffset = blockMeta.getMaxQualifierLength();
qualifierLength = 0;
nubCellsRemain = false;
currentCellIndex = -1;
timestamp = -1L;
type = DEFAULT_TYPE;
absoluteValueOffset = 0;//use 0 vs -1 so the cell is valid when value hasn't been initialized
valueLength = 0;// had it at -1, but that causes null Cell to add up to the wrong length
tagsOffset = blockMeta.getMaxTagsLength();
tagsLength = 0;
}
/**
* Call this before putting the scanner back into a pool so it doesn't hold the last used block
* in memory.
*/
public void releaseBlockReference(){
block = null;
}
/********************** CellScanner **********************/
@Override
public Cell current() {
if(isOutOfBounds()){
return null;
}
return (Cell)this;
}
/******************* Object methods ************************/
@Override
public boolean equals(Object obj) {
//trivial override to confirm intent (findbugs)
return super.equals(obj);
}
@Override
public int hashCode() {
return super.hashCode();
}
/**
* Override PrefixTreeCell.toString() with a check to see if the current cell is valid.
*/
@Override
public String toString() {
Cell currentCell = current();
if(currentCell==null){
return "null";
}
return ((PrefixTreeCell)currentCell).getKeyValueString();
}
/******************* advance ***************************/
public boolean positionAtFirstCell() {
reInitFirstNode();
return advance();
}
@Override
public boolean advance() {
if (afterLast) {
return false;
}
if (!hasOccurrences()) {
resetToBeforeFirstEntry();
}
if (beforeFirst || isLastCellInRow()) {
nextRow();
if (afterLast) {
return false;
}
} else {
++currentCellIndex;
}
populateNonRowFields(currentCellIndex);
return true;
}
public boolean nextRow() {
nextRowInternal();
if (afterLast) {
return false;
}
populateNonRowFields(currentCellIndex);
return true;
}
/**
* This method is safe to call when the scanner is not on a fully valid row node, as in the case
* of a row token miss in the Searcher
* @return true if we are positioned on a valid row, false if past end of block
*/
protected boolean nextRowInternal() {
if (afterLast) {
return false;
}
if (beforeFirst) {
initFirstNode();
if (currentRowNode.hasOccurrences()) {
if (currentRowNode.isNub()) {
nubCellsRemain = true;
}
currentCellIndex = 0;
return true;
}
}
if (currentRowNode.isLeaf()) {
discardCurrentRowNode(true);
}
while (!afterLast) {
if (nubCellsRemain) {
nubCellsRemain = false;
}
if (currentRowNode.hasMoreFanNodes()) {
followNextFan();
if (currentRowNode.hasOccurrences()) {
// found some values
currentCellIndex = 0;
return true;
}
} else {
discardCurrentRowNode(true);
}
}
return false;// went past the end
}
/**************** secondary traversal methods ******************************/
protected void reInitFirstNode() {
resetToBeforeFirstEntry();
initFirstNode();
}
protected void initFirstNode() {
int offsetIntoUnderlyingStructure = blockMeta.getAbsoluteRowOffset();
rowNodeStackIndex = 0;
currentRowNode = rowNodes[0];
currentRowNode.initOnBlock(blockMeta, block, offsetIntoUnderlyingStructure);
appendCurrentTokenToRowBuffer();
beforeFirst = false;
}
protected void followFirstFan() {
followFan(0);
}
protected void followPreviousFan() {
int nextFanPosition = currentRowNode.getFanIndex() - 1;
followFan(nextFanPosition);
}
protected void followCurrentFan() {
int currentFanPosition = currentRowNode.getFanIndex();
followFan(currentFanPosition);
}
protected void followNextFan() {
int nextFanPosition = currentRowNode.getFanIndex() + 1;
followFan(nextFanPosition);
}
protected void followLastFan() {
followFan(currentRowNode.getLastFanIndex());
}
protected void followFan(int fanIndex) {
currentRowNode.setFanIndex(fanIndex);
appendToRowBuffer(currentRowNode.getFanByte(fanIndex));
int nextOffsetIntoUnderlyingStructure = currentRowNode.getOffset()
+ currentRowNode.getNextNodeOffset(fanIndex, blockMeta);
++rowNodeStackIndex;
currentRowNode = rowNodes[rowNodeStackIndex];
currentRowNode.initOnBlock(blockMeta, block, nextOffsetIntoUnderlyingStructure);
//TODO getToken is spewing garbage
appendCurrentTokenToRowBuffer();
if (currentRowNode.isNub()) {
nubCellsRemain = true;
}
currentCellIndex = 0;
}
/**
* @param forwards which marker to set if we overflow
*/
protected void discardCurrentRowNode(boolean forwards) {
RowNodeReader rowNodeBeingPopped = currentRowNode;
--rowNodeStackIndex;// pop it off the stack
if (rowNodeStackIndex < 0) {
currentRowNode = null;
if (forwards) {
markAfterLast();
} else {
markBeforeFirst();
}
return;
}
popFromRowBuffer(rowNodeBeingPopped);
currentRowNode = rowNodes[rowNodeStackIndex];
}
protected void markBeforeFirst() {
beforeFirst = true;
afterLast = false;
currentRowNode = null;
}
protected void markAfterLast() {
beforeFirst = false;
afterLast = true;
currentRowNode = null;
}
/***************** helper methods **************************/
protected void appendCurrentTokenToRowBuffer() {
block.get(currentRowNode.getTokenArrayOffset(), rowBuffer, rowLength,
currentRowNode.getTokenLength());
rowLength += currentRowNode.getTokenLength();
}
protected void appendToRowBuffer(byte b) {
rowBuffer[rowLength] = b;
++rowLength;
}
protected void popFromRowBuffer(RowNodeReader rowNodeBeingPopped) {
rowLength -= rowNodeBeingPopped.getTokenLength();
--rowLength; // pop the parent's fan byte
}
protected boolean hasOccurrences() {
return currentRowNode != null && currentRowNode.hasOccurrences();
}
protected boolean isBranch() {
return currentRowNode != null && !currentRowNode.hasOccurrences()
&& currentRowNode.hasChildren();
}
protected boolean isNub() {
return currentRowNode != null && currentRowNode.hasOccurrences()
&& currentRowNode.hasChildren();
}
protected boolean isLeaf() {
return currentRowNode != null && currentRowNode.hasOccurrences()
&& !currentRowNode.hasChildren();
}
//TODO expose this in a PrefixTreeScanner interface
public boolean isBeforeFirst(){
return beforeFirst;
}
public boolean isAfterLast(){
return afterLast;
}
protected boolean isOutOfBounds(){
return beforeFirst || afterLast;
}
protected boolean isFirstCellInRow() {
return currentCellIndex == 0;
}
protected boolean isLastCellInRow() {
return currentCellIndex == currentRowNode.getLastCellIndex();
}
/********************* fill in family/qualifier/ts/type/value ************/
protected int populateNonRowFieldsAndCompareTo(int cellNum, Cell key) {
populateNonRowFields(cellNum);
return PrivateCellUtil.compareKeyIgnoresMvcc(comparator, this, key);
}
protected void populateFirstNonRowFields() {
populateNonRowFields(0);
}
protected void populatePreviousNonRowFields() {
populateNonRowFields(currentCellIndex - 1);
}
protected void populateLastNonRowFields() {
populateNonRowFields(currentRowNode.getLastCellIndex());
}
protected void populateNonRowFields(int cellIndex) {
currentCellIndex = cellIndex;
populateFamily();
populateQualifier();
// Read tags only if there are tags in the meta
if(blockMeta.getNumTagsBytes() != 0) {
populateTag();
}
populateTimestamp();
populateMvccVersion();
populateType();
populateValueOffsets();
}
protected void populateFamily() {
int familyTreeIndex = currentRowNode.getFamilyOffset(currentCellIndex, blockMeta);
familyOffset = familyReader.populateBuffer(familyTreeIndex).getColumnOffset();
familyLength = familyReader.getColumnLength();
}
protected void populateQualifier() {
int qualifierTreeIndex = currentRowNode.getColumnOffset(currentCellIndex, blockMeta);
qualifierOffset = qualifierReader.populateBuffer(qualifierTreeIndex).getColumnOffset();
qualifierLength = qualifierReader.getColumnLength();
}
protected void populateTag() {
int tagTreeIndex = currentRowNode.getTagOffset(currentCellIndex, blockMeta);
tagsOffset = tagsReader.populateBuffer(tagTreeIndex).getColumnOffset();
tagsLength = tagsReader.getColumnLength();
}
protected void populateTimestamp() {
if (blockMeta.isAllSameTimestamp()) {
timestamp = blockMeta.getMinTimestamp();
} else {
int timestampIndex = currentRowNode.getTimestampIndex(currentCellIndex, blockMeta);
timestamp = timestampDecoder.getLong(timestampIndex);
}
}
protected void populateMvccVersion() {
if (blockMeta.isAllSameMvccVersion()) {
mvccVersion = blockMeta.getMinMvccVersion();
} else {
int mvccVersionIndex = currentRowNode.getMvccVersionIndex(currentCellIndex,
blockMeta);
mvccVersion = mvccVersionDecoder.getMvccVersion(mvccVersionIndex);
}
}
protected void populateType() {
int typeInt;
if (blockMeta.isAllSameType()) {
typeInt = blockMeta.getAllTypes();
} else {
typeInt = currentRowNode.getType(currentCellIndex, blockMeta);
}
type = PrefixTreeCell.TYPES[typeInt];
}
protected void populateValueOffsets() {
int offsetIntoValueSection = currentRowNode.getValueOffset(currentCellIndex, blockMeta);
absoluteValueOffset = blockMeta.getAbsoluteValueOffset() + offsetIntoValueSection;
valueLength = currentRowNode.getValueLength(currentCellIndex, blockMeta);
this.block.asSubByteBuffer(this.absoluteValueOffset, valueLength, pair);
}
/**************** getters ***************************/
public PrefixTreeBlockMeta getBlockMeta() {
return blockMeta;
}
public int getMaxRowTreeStackNodes() {
return rowNodes.length;
}
public int getRowBufferLength() {
return rowBuffer.length;
}
public int getQualifierBufferLength() {
return qualifierBuffer.length;
}
public int getTagBufferLength() {
return tagsBuffer.length;
}
}

View File

@ -1,418 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.codec.prefixtree.decode;
import org.apache.yetus.audience.InterfaceAudience;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.PrivateCellUtil;
import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeBlockMeta;
import org.apache.hadoop.hbase.codec.prefixtree.scanner.CellScannerPosition;
import org.apache.hadoop.hbase.codec.prefixtree.scanner.CellSearcher;
import org.apache.hadoop.hbase.shaded.com.google.common.primitives.UnsignedBytes;
/**
* <p>
* Searcher extends the capabilities of the Scanner + ReversibleScanner to add the ability to
* position itself on a requested Cell without scanning through cells before it. The PrefixTree is
* set up to be a Trie of rows, so finding a particular row is extremely cheap.
* </p>
* Once it finds the row, it does a binary search through the cells inside the row, which is not as
* fast as the trie search, but faster than iterating through every cell like existing block
* formats
* do. For this reason, this implementation is targeted towards schemas where rows are narrow
* enough
* to have several or many per block, and where you are generally looking for the entire row or
* the
* first cell. It will still be fast for wide rows or point queries, but could be improved upon.
*/
@InterfaceAudience.Private
public class PrefixTreeArraySearcher extends PrefixTreeArrayReversibleScanner implements
CellSearcher {
/*************** construct ******************************/
public PrefixTreeArraySearcher(PrefixTreeBlockMeta blockMeta, int rowTreeDepth,
int rowBufferLength, int qualifierBufferLength, int tagsBufferLength) {
super(blockMeta, rowTreeDepth, rowBufferLength, qualifierBufferLength, tagsBufferLength);
}
/********************* CellSearcher methods *******************/
@Override
public boolean positionAt(Cell key) {
return CellScannerPosition.AT == positionAtOrAfter(key);
}
@Override
public CellScannerPosition positionAtOrBefore(Cell key) {
reInitFirstNode();
int fanIndex = -1;
while(true){
//detect row mismatch. break loop if mismatch
int currentNodeDepth = rowLength;
int rowTokenComparison = compareToCurrentToken(key);
if(rowTokenComparison != 0){
return fixRowTokenMissReverse(rowTokenComparison);
}
//exact row found, move on to qualifier & ts
if(rowMatchesAfterCurrentPosition(key)){
return positionAtQualifierTimestamp(key, true);
}
//detect dead end (no fan to descend into)
if(!currentRowNode.hasFan()){
if(hasOccurrences()){//must be leaf or nub
populateLastNonRowFields();
return CellScannerPosition.BEFORE;
}else{
//TODO i don't think this case is exercised by any tests
return fixRowFanMissReverse(0);
}
}
//keep hunting for the rest of the row
byte searchForByte = PrivateCellUtil.getRowByte(key, currentNodeDepth);
fanIndex = currentRowNode.whichFanNode(searchForByte);
if(fanIndex < 0){//no matching row. return early
int insertionPoint = -fanIndex - 1;
return fixRowFanMissReverse(insertionPoint);
}
//found a match, so dig deeper into the tree
followFan(fanIndex);
}
}
/**
* Identical workflow as positionAtOrBefore, but split them to avoid having ~10 extra
* if-statements. Priority on readability and debugability.
*/
@Override
public CellScannerPosition positionAtOrAfter(Cell key) {
reInitFirstNode();
int fanIndex = -1;
while(true){
//detect row mismatch. break loop if mismatch
int currentNodeDepth = rowLength;
int rowTokenComparison = compareToCurrentToken(key);
if(rowTokenComparison != 0){
return fixRowTokenMissForward(rowTokenComparison);
}
//exact row found, move on to qualifier & ts
if(rowMatchesAfterCurrentPosition(key)){
return positionAtQualifierTimestamp(key, false);
}
//detect dead end (no fan to descend into)
if(!currentRowNode.hasFan()){
if(hasOccurrences()){
if (rowLength < key.getRowLength()) {
nextRow();
} else {
populateFirstNonRowFields();
}
return CellScannerPosition.AFTER;
}else{
//TODO i don't think this case is exercised by any tests
return fixRowFanMissForward(0);
}
}
//keep hunting for the rest of the row
byte searchForByte = PrivateCellUtil.getRowByte(key, currentNodeDepth);
fanIndex = currentRowNode.whichFanNode(searchForByte);
if(fanIndex < 0){//no matching row. return early
int insertionPoint = -fanIndex - 1;
return fixRowFanMissForward(insertionPoint);
}
//found a match, so dig deeper into the tree
followFan(fanIndex);
}
}
@Override
public boolean seekForwardTo(Cell key) {
if(currentPositionIsAfter(key)){
//our position is after the requested key, so can't do anything
return false;
}
return positionAt(key);
}
@Override
public CellScannerPosition seekForwardToOrBefore(Cell key) {
//Do we even need this check or should upper layers avoid this situation. It's relatively
//expensive compared to the rest of the seek operation.
if(currentPositionIsAfter(key)){
//our position is after the requested key, so can't do anything
return CellScannerPosition.AFTER;
}
return positionAtOrBefore(key);
}
@Override
public CellScannerPosition seekForwardToOrAfter(Cell key) {
//Do we even need this check or should upper layers avoid this situation. It's relatively
//expensive compared to the rest of the seek operation.
if(currentPositionIsAfter(key)){
//our position is after the requested key, so can't do anything
return CellScannerPosition.AFTER;
}
return positionAtOrAfter(key);
}
/**
* The content of the buffers doesn't matter here, only that afterLast=true and beforeFirst=false
*/
@Override
public void positionAfterLastCell() {
resetToBeforeFirstEntry();
beforeFirst = false;
afterLast = true;
}
/***************** Object methods ***************************/
@Override
public boolean equals(Object obj) {
//trivial override to confirm intent (findbugs)
return super.equals(obj);
}
/****************** internal methods ************************/
protected boolean currentPositionIsAfter(Cell cell){
return compareTo(cell) > 0;
}
protected CellScannerPosition positionAtQualifierTimestamp(Cell key, boolean beforeOnMiss) {
int minIndex = 0;
int maxIndex = currentRowNode.getLastCellIndex();
int diff;
while (true) {
int midIndex = (maxIndex + minIndex) / 2;//don't worry about overflow
diff = populateNonRowFieldsAndCompareTo(midIndex, key);
if (diff == 0) {// found exact match
return CellScannerPosition.AT;
} else if (minIndex == maxIndex) {// even termination case
break;
} else if ((minIndex + 1) == maxIndex) {// odd termination case
diff = populateNonRowFieldsAndCompareTo(maxIndex, key);
if(diff > 0){
diff = populateNonRowFieldsAndCompareTo(minIndex, key);
}
break;
} else if (diff < 0) {// keep going forward
minIndex = currentCellIndex;
} else {// went past it, back up
maxIndex = currentCellIndex;
}
}
if (diff == 0) {
return CellScannerPosition.AT;
} else if (diff < 0) {// we are before key
if (beforeOnMiss) {
return CellScannerPosition.BEFORE;
}
if (advance()) {
return CellScannerPosition.AFTER;
}
return CellScannerPosition.AFTER_LAST;
} else {// we are after key
if (!beforeOnMiss) {
return CellScannerPosition.AFTER;
}
if (previous()) {
return CellScannerPosition.BEFORE;
}
return CellScannerPosition.BEFORE_FIRST;
}
}
/**
* compare this.row to key.row but starting at the current rowLength
* @param key Cell being searched for
* @return true if row buffer contents match key.row
*/
protected boolean rowMatchesAfterCurrentPosition(Cell key) {
if (!currentRowNode.hasOccurrences()) {
return false;
}
int thatRowLength = key.getRowLength();
if (rowLength != thatRowLength) {
return false;
}
return true;
}
// TODO move part of this to Cell comparator?
/**
* Compare only the bytes within the window of the current token
* @param key
* @return return -1 if key is lessThan (before) this, 0 if equal, and 1 if key is after
*/
protected int compareToCurrentToken(Cell key) {
int startIndex = rowLength - currentRowNode.getTokenLength();
int endIndexExclusive = startIndex + currentRowNode.getTokenLength();
for (int i = startIndex; i < endIndexExclusive; ++i) {
if (i >= key.getRowLength()) {// key was shorter, so it's first
return -1;
}
byte keyByte = PrivateCellUtil.getRowByte(key, i);
byte thisByte = rowBuffer[i];
if (keyByte == thisByte) {
continue;
}
return UnsignedBytes.compare(keyByte, thisByte);
}
if (!currentRowNode.hasOccurrences() && rowLength >= key.getRowLength()) { // key was shorter
return -1;
}
return 0;
}
protected void followLastFansUntilExhausted(){
while(currentRowNode.hasFan()){
followLastFan();
}
}
/****************** complete seek when token mismatch ******************/
/**
* @param searcherIsAfterInputKey &lt;0: input key is before the searcher's position<br>
* &gt;0: input key is after the searcher's position
*/
protected CellScannerPosition fixRowTokenMissReverse(int searcherIsAfterInputKey) {
if (searcherIsAfterInputKey < 0) {//searcher position is after the input key, so back up
boolean foundPreviousRow = previousRow(true);
if(foundPreviousRow){
populateLastNonRowFields();
return CellScannerPosition.BEFORE;
}else{
return CellScannerPosition.BEFORE_FIRST;
}
}else{//searcher position is before the input key
if(currentRowNode.hasOccurrences()){
populateFirstNonRowFields();
return CellScannerPosition.BEFORE;
}
boolean foundNextRow = nextRow();
if(foundNextRow){
return CellScannerPosition.AFTER;
}else{
return CellScannerPosition.AFTER_LAST;
}
}
}
/**
* @param searcherIsAfterInputKey &lt;0: input key is before the searcher's position<br>
* &gt;0: input key is after the searcher's position
*/
protected CellScannerPosition fixRowTokenMissForward(int searcherIsAfterInputKey) {
if (searcherIsAfterInputKey < 0) {//searcher position is after the input key
if(currentRowNode.hasOccurrences()){
populateFirstNonRowFields();
return CellScannerPosition.AFTER;
}
boolean foundNextRow = nextRow();
if(foundNextRow){
return CellScannerPosition.AFTER;
}else{
return CellScannerPosition.AFTER_LAST;
}
}else{//searcher position is before the input key, so go forward
discardCurrentRowNode(true);
boolean foundNextRow = nextRow();
if(foundNextRow){
return CellScannerPosition.AFTER;
}else{
return CellScannerPosition.AFTER_LAST;
}
}
}
/****************** complete seek when fan mismatch ******************/
protected CellScannerPosition fixRowFanMissReverse(int fanInsertionPoint){
if(fanInsertionPoint == 0){//we need to back up a row
if (currentRowNode.hasOccurrences()) {
populateLastNonRowFields();
return CellScannerPosition.BEFORE;
}
boolean foundPreviousRow = previousRow(true);//true -> position on last cell in row
if(foundPreviousRow){
populateLastNonRowFields();
return CellScannerPosition.BEFORE;
}
return CellScannerPosition.BEFORE_FIRST;
}
//follow the previous fan, but then descend recursively forward
followFan(fanInsertionPoint - 1);
followLastFansUntilExhausted();
populateLastNonRowFields();
return CellScannerPosition.BEFORE;
}
protected CellScannerPosition fixRowFanMissForward(int fanInsertionPoint){
if(fanInsertionPoint >= currentRowNode.getFanOut()){
discardCurrentRowNode(true);
if (!nextRow()) {
return CellScannerPosition.AFTER_LAST;
} else {
return CellScannerPosition.AFTER;
}
}
followFan(fanInsertionPoint);
if(hasOccurrences()){
populateFirstNonRowFields();
return CellScannerPosition.AFTER;
}
if(nextRowInternal()){
populateFirstNonRowFields();
return CellScannerPosition.AFTER;
}else{
return CellScannerPosition.AFTER_LAST;
}
}
}

View File

@ -1,311 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.codec.prefixtree.decode;
import java.nio.ByteBuffer;
import org.apache.hadoop.hbase.ByteBufferCell;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellComparator;
import org.apache.hadoop.hbase.CellComparatorImpl;
import org.apache.hadoop.hbase.PrivateCellUtil;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.KeyValueUtil;
import org.apache.hadoop.hbase.SettableSequenceId;
import org.apache.yetus.audience.InterfaceAudience;
import org.apache.hadoop.hbase.nio.ByteBuff;
import org.apache.hadoop.hbase.util.ByteBufferUtils;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.ObjectIntPair;
/**
* As the PrefixTreeArrayScanner moves through the tree bytes, it changes the
* values in the fields of this class so that Cell logic can be applied, but
* without allocating new memory for every Cell iterated through.
*/
@InterfaceAudience.Private
public class PrefixTreeCell extends ByteBufferCell implements SettableSequenceId,
Comparable<Cell> {
// Create a reference here? Can be removed too
protected CellComparator comparator = CellComparatorImpl.COMPARATOR;
/********************** static **********************/
public static final KeyValue.Type[] TYPES = new KeyValue.Type[256];
static {
for (KeyValue.Type type : KeyValue.Type.values()) {
TYPES[type.getCode() & 0xff] = type;
}
}
// Same as KeyValue constructor. Only used to avoid NPE's when full cell
// hasn't been initialized.
public static final KeyValue.Type DEFAULT_TYPE = KeyValue.Type.Put;
/******************** fields ************************/
protected ByteBuff block;
// we could also avoid setting the mvccVersion in the scanner/searcher, but
// this is simpler
protected boolean includeMvccVersion;
protected byte[] rowBuffer;
protected int rowLength;
protected byte[] familyBuffer;
protected int familyOffset;
protected int familyLength;
protected byte[] qualifierBuffer;// aligned to the end of the array
protected int qualifierOffset;
protected int qualifierLength;
protected Long timestamp;
protected Long mvccVersion;
protected KeyValue.Type type;
protected int absoluteValueOffset;
protected int valueLength;
protected byte[] tagsBuffer;
protected int tagsOffset;
protected int tagsLength;
// Pair to set the value ByteBuffer and its offset
protected ObjectIntPair<ByteBuffer> pair = new ObjectIntPair<>();
/********************** Cell methods ******************/
/**
* For debugging. Currently creates new KeyValue to utilize its toString()
* method.
*/
@Override
public String toString() {
return getKeyValueString();
}
@Override
public boolean equals(Object obj) {
if (!(obj instanceof Cell)) {
return false;
}
// Temporary hack to maintain backwards compatibility with KeyValue.equals
return PrivateCellUtil.equalsIgnoreMvccVersion(this, (Cell) obj);
// TODO return CellComparator.equals(this, (Cell)obj);//see HBASE-6907
}
@Override
public int hashCode() {
return calculateHashForKey(this);
}
private int calculateHashForKey(Cell cell) {
// pre-calculate the 3 hashes made of byte ranges
int rowHash = Bytes.hashCode(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength());
int familyHash = Bytes.hashCode(cell.getFamilyArray(), cell.getFamilyOffset(),
cell.getFamilyLength());
int qualifierHash = Bytes.hashCode(cell.getQualifierArray(), cell.getQualifierOffset(),
cell.getQualifierLength());
// combine the 6 sub-hashes
int hash = 31 * rowHash + familyHash;
hash = 31 * hash + qualifierHash;
hash = 31 * hash + (int) cell.getTimestamp();
hash = 31 * hash + cell.getTypeByte();
return hash;
}
@Override
public int compareTo(Cell other) {
return comparator.compare(this, other);
}
@Override
public long getTimestamp() {
return timestamp;
}
@Override
public long getSequenceId() {
if (!includeMvccVersion) {
return 0L;
}
return mvccVersion;
}
@Override
public int getValueLength() {
return valueLength;
}
@Override
public byte[] getRowArray() {
return rowBuffer;
}
@Override
public int getRowOffset() {
return 0;
}
@Override
public short getRowLength() {
return (short) rowLength;
}
@Override
public byte[] getFamilyArray() {
return familyBuffer;
}
@Override
public int getFamilyOffset() {
return familyOffset;
}
@Override
public byte getFamilyLength() {
return (byte) familyLength;
}
@Override
public byte[] getQualifierArray() {
return qualifierBuffer;
}
@Override
public int getQualifierOffset() {
return qualifierOffset;
}
@Override
public int getQualifierLength() {
return qualifierLength;
}
@Override
public byte[] getValueArray() {
if (this.pair.getFirst().hasArray()) {
return this.pair.getFirst().array();
} else {
// Just in case getValueArray is called on offheap BB
byte[] val = new byte[valueLength];
ByteBufferUtils.copyFromBufferToArray(val, this.pair.getFirst(), this.pair.getSecond(), 0,
valueLength);
return val;
}
}
@Override
public int getValueOffset() {
if (this.pair.getFirst().hasArray()) {
return this.pair.getSecond() + this.pair.getFirst().arrayOffset();
} else {
return 0;
}
}
@Override
public byte getTypeByte() {
return type.getCode();
}
/************************* helper methods *************************/
/**
* Need this separate method so we can call it from subclasses' toString()
* methods
*/
protected String getKeyValueString() {
KeyValue kv = KeyValueUtil.copyToNewKeyValue(this);
return kv.toString();
}
@Override
public int getTagsOffset() {
return tagsOffset;
}
@Override
public int getTagsLength() {
return tagsLength;
}
@Override
public byte[] getTagsArray() {
return this.tagsBuffer;
}
@Override
public void setSequenceId(long seqId) {
mvccVersion = seqId;
}
@Override
public ByteBuffer getRowByteBuffer() {
return ByteBuffer.wrap(rowBuffer);
}
@Override
public int getRowPosition() {
return 0;
}
@Override
public ByteBuffer getFamilyByteBuffer() {
return ByteBuffer.wrap(familyBuffer);
}
@Override
public int getFamilyPosition() {
return getFamilyOffset();
}
@Override
public ByteBuffer getQualifierByteBuffer() {
return ByteBuffer.wrap(qualifierBuffer);
}
@Override
public int getQualifierPosition() {
return getQualifierOffset();
}
@Override
public ByteBuffer getValueByteBuffer() {
return pair.getFirst();
}
@Override
public int getValuePosition() {
return pair.getSecond();
}
@Override
public ByteBuffer getTagsByteBuffer() {
return ByteBuffer.wrap(tagsBuffer);
}
@Override
public int getTagsPosition() {
return getTagsOffset();
}
}

View File

@ -1,109 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.codec.prefixtree.decode.column;
import org.apache.yetus.audience.InterfaceAudience;
import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeBlockMeta;
import org.apache.hadoop.hbase.codec.prefixtree.encode.other.ColumnNodeType;
import org.apache.hadoop.hbase.nio.ByteBuff;
import org.apache.hadoop.hbase.util.vint.UFIntTool;
import org.apache.hadoop.hbase.util.vint.UVIntTool;
@InterfaceAudience.Private
public class ColumnNodeReader {
/**************** fields ************************/
protected PrefixTreeBlockMeta blockMeta;
protected ByteBuff block;
protected ColumnNodeType nodeType;
protected byte[] columnBuffer;
protected int offsetIntoBlock;
protected int tokenOffsetIntoBlock;
protected int tokenLength;
protected int parentStartPosition;
/************** construct *************************/
public ColumnNodeReader(byte[] columnBuffer, ColumnNodeType nodeType) {
this.columnBuffer = columnBuffer;
this.nodeType = nodeType;
}
public void initOnBlock(PrefixTreeBlockMeta blockMeta, ByteBuff block) {
this.blockMeta = blockMeta;
this.block = block;
}
/************* methods *****************************/
public void positionAt(int offsetIntoBlock) {
this.offsetIntoBlock = offsetIntoBlock;
tokenLength = UVIntTool.getInt(block, offsetIntoBlock);
tokenOffsetIntoBlock = offsetIntoBlock + UVIntTool.numBytes(tokenLength);
int parentStartPositionIndex = tokenOffsetIntoBlock + tokenLength;
int offsetWidth;
if(nodeType == ColumnNodeType.FAMILY) {
offsetWidth = blockMeta.getFamilyOffsetWidth();
} else if(nodeType == ColumnNodeType.QUALIFIER) {
offsetWidth = blockMeta.getQualifierOffsetWidth();
} else {
offsetWidth = blockMeta.getTagsOffsetWidth();
}
parentStartPosition = (int) UFIntTool.fromBytes(block, parentStartPositionIndex, offsetWidth);
}
public void prependTokenToBuffer(int bufferStartIndex) {
block.get(tokenOffsetIntoBlock, columnBuffer, bufferStartIndex, tokenLength);
}
public boolean isRoot() {
if (nodeType == ColumnNodeType.FAMILY) {
return offsetIntoBlock == blockMeta.getAbsoluteFamilyOffset();
} else if (nodeType == ColumnNodeType.QUALIFIER) {
return offsetIntoBlock == blockMeta.getAbsoluteQualifierOffset();
} else {
return offsetIntoBlock == blockMeta.getAbsoluteTagsOffset();
}
}
/************** standard methods *********************/
@Override
public String toString() {
return super.toString() + "[" + offsetIntoBlock + "]";
}
/****************** get/set ****************************/
public int getTokenLength() {
return tokenLength;
}
public int getParentStartPosition() {
return parentStartPosition;
}
}

View File

@ -1,108 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.codec.prefixtree.decode.column;
import org.apache.yetus.audience.InterfaceAudience;
import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeBlockMeta;
import org.apache.hadoop.hbase.codec.prefixtree.encode.other.ColumnNodeType;
import org.apache.hadoop.hbase.nio.ByteBuff;
/**
* Position one of these appropriately in the data block and you can call its methods to retrieve
* the family or qualifier at the current position.
*/
@InterfaceAudience.Private
public class ColumnReader {
/****************** fields *************************/
protected PrefixTreeBlockMeta blockMeta;
protected byte[] columnBuffer;
protected int columnOffset;
protected int columnLength;
protected ColumnNodeType nodeType;
protected ColumnNodeReader columnNodeReader;
/******************** construct *******************/
public ColumnReader(byte[] columnBuffer, ColumnNodeType nodeType) {
this.columnBuffer = columnBuffer;
this.nodeType = nodeType;
this.columnNodeReader = new ColumnNodeReader(columnBuffer, nodeType);
}
public void initOnBlock(PrefixTreeBlockMeta blockMeta, ByteBuff block) {
this.blockMeta = blockMeta;
clearColumnBuffer();
columnNodeReader.initOnBlock(blockMeta, block);
}
/********************* methods *******************/
public ColumnReader populateBuffer(int offsetIntoColumnData) {
clearColumnBuffer();
int nextRelativeOffset = offsetIntoColumnData;
while (true) {
int absoluteOffset = 0;
if (nodeType == ColumnNodeType.FAMILY) {
absoluteOffset = blockMeta.getAbsoluteFamilyOffset() + nextRelativeOffset;
} else if (nodeType == ColumnNodeType.QUALIFIER) {
absoluteOffset = blockMeta.getAbsoluteQualifierOffset() + nextRelativeOffset;
} else {
absoluteOffset = blockMeta.getAbsoluteTagsOffset() + nextRelativeOffset;
}
columnNodeReader.positionAt(absoluteOffset);
columnOffset -= columnNodeReader.getTokenLength();
columnLength += columnNodeReader.getTokenLength();
columnNodeReader.prependTokenToBuffer(columnOffset);
if (columnNodeReader.isRoot()) {
return this;
}
nextRelativeOffset = columnNodeReader.getParentStartPosition();
}
}
public byte[] copyBufferToNewArray() {// for testing
byte[] out = new byte[columnLength];
System.arraycopy(columnBuffer, columnOffset, out, 0, out.length);
return out;
}
public int getColumnLength() {
return columnLength;
}
public void clearColumnBuffer() {
columnOffset = columnBuffer.length;
columnLength = 0;
}
/****************************** get/set *************************************/
public int getColumnOffset() {
return columnOffset;
}
}

View File

@ -1,281 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.codec.prefixtree.decode.row;
import org.apache.yetus.audience.InterfaceAudience;
import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeBlockMeta;
import org.apache.hadoop.hbase.nio.ByteBuff;
import org.apache.hadoop.hbase.util.vint.UFIntTool;
import org.apache.hadoop.hbase.util.vint.UVIntTool;
/**
* Position one of these appropriately in the data block and you can call its methods to retrieve
* information necessary to decode the cells in the row.
*/
@InterfaceAudience.Private
public class RowNodeReader {
/************* fields ***********************************/
protected ByteBuff block;
protected int offset;
protected int fanIndex;
protected int numCells;
protected int tokenOffset;
protected int tokenLength;
protected int fanOffset;
protected int fanOut;
protected int familyOffsetsOffset;
protected int qualifierOffsetsOffset;
protected int timestampIndexesOffset;
protected int mvccVersionIndexesOffset;
protected int operationTypesOffset;
protected int valueOffsetsOffset;
protected int valueLengthsOffset;
protected int tagOffsetsOffset;
protected int nextNodeOffsetsOffset;
/******************* construct **************************/
public void initOnBlock(PrefixTreeBlockMeta blockMeta, ByteBuff block, int offset) {
this.block = block;
this.offset = offset;
resetFanIndex();
this.tokenLength = UVIntTool.getInt(block, offset);
this.tokenOffset = offset + UVIntTool.numBytes(tokenLength);
this.fanOut = UVIntTool.getInt(block, tokenOffset + tokenLength);
this.fanOffset = tokenOffset + tokenLength + UVIntTool.numBytes(fanOut);
this.numCells = UVIntTool.getInt(block, fanOffset + fanOut);
this.familyOffsetsOffset = fanOffset + fanOut + UVIntTool.numBytes(numCells);
this.qualifierOffsetsOffset = familyOffsetsOffset + numCells * blockMeta.getFamilyOffsetWidth();
this.tagOffsetsOffset = this.qualifierOffsetsOffset + numCells * blockMeta.getQualifierOffsetWidth();
// TODO : This code may not be needed now..As we always consider tags to be present
if(blockMeta.getTagsOffsetWidth() == 0) {
// Make both of them same so that we know that there are no tags
this.tagOffsetsOffset = this.qualifierOffsetsOffset;
this.timestampIndexesOffset = qualifierOffsetsOffset + numCells * blockMeta.getQualifierOffsetWidth();
} else {
this.timestampIndexesOffset = tagOffsetsOffset + numCells * blockMeta.getTagsOffsetWidth();
}
this.mvccVersionIndexesOffset = timestampIndexesOffset + numCells
* blockMeta.getTimestampIndexWidth();
this.operationTypesOffset = mvccVersionIndexesOffset + numCells
* blockMeta.getMvccVersionIndexWidth();
this.valueOffsetsOffset = operationTypesOffset + numCells * blockMeta.getKeyValueTypeWidth();
this.valueLengthsOffset = valueOffsetsOffset + numCells * blockMeta.getValueOffsetWidth();
this.nextNodeOffsetsOffset = valueLengthsOffset + numCells * blockMeta.getValueLengthWidth();
}
/******************** methods ****************************/
public boolean isLeaf() {
return fanOut == 0;
}
public boolean isNub() {
return fanOut > 0 && numCells > 0;
}
public boolean isBranch() {
return fanOut > 0 && numCells == 0;
}
public boolean hasOccurrences() {
return numCells > 0;
}
public int getTokenArrayOffset(){
return tokenOffset;
}
public int getTokenLength() {
return tokenLength;
}
public byte getFanByte(int i) {
return block.get(fanOffset + i);
}
/**
* for debugging
*/
protected String getFanByteReadable(int i){
return ByteBuff.toStringBinary(block, fanOffset + i, 1);
}
public int getFamilyOffset(int index, PrefixTreeBlockMeta blockMeta) {
int fIntWidth = blockMeta.getFamilyOffsetWidth();
int startIndex = familyOffsetsOffset + fIntWidth * index;
return (int) UFIntTool.fromBytes(block, startIndex, fIntWidth);
}
public int getColumnOffset(int index, PrefixTreeBlockMeta blockMeta) {
int fIntWidth = blockMeta.getQualifierOffsetWidth();
int startIndex = qualifierOffsetsOffset + fIntWidth * index;
return (int) UFIntTool.fromBytes(block, startIndex, fIntWidth);
}
public int getTagOffset(int index, PrefixTreeBlockMeta blockMeta) {
int fIntWidth = blockMeta.getTagsOffsetWidth();
int startIndex = tagOffsetsOffset + fIntWidth * index;
return (int) UFIntTool.fromBytes(block, startIndex, fIntWidth);
}
public int getTimestampIndex(int index, PrefixTreeBlockMeta blockMeta) {
int fIntWidth = blockMeta.getTimestampIndexWidth();
int startIndex = timestampIndexesOffset + fIntWidth * index;
return (int) UFIntTool.fromBytes(block, startIndex, fIntWidth);
}
public int getMvccVersionIndex(int index, PrefixTreeBlockMeta blockMeta) {
int fIntWidth = blockMeta.getMvccVersionIndexWidth();
int startIndex = mvccVersionIndexesOffset + fIntWidth * index;
return (int) UFIntTool.fromBytes(block, startIndex, fIntWidth);
}
public int getType(int index, PrefixTreeBlockMeta blockMeta) {
if (blockMeta.isAllSameType()) {
return blockMeta.getAllTypes();
}
return block.get(operationTypesOffset + index);
}
public int getValueOffset(int index, PrefixTreeBlockMeta blockMeta) {
int fIntWidth = blockMeta.getValueOffsetWidth();
int startIndex = valueOffsetsOffset + fIntWidth * index;
int offset = (int) UFIntTool.fromBytes(block, startIndex, fIntWidth);
return offset;
}
public int getValueLength(int index, PrefixTreeBlockMeta blockMeta) {
int fIntWidth = blockMeta.getValueLengthWidth();
int startIndex = valueLengthsOffset + fIntWidth * index;
int length = (int) UFIntTool.fromBytes(block, startIndex, fIntWidth);
return length;
}
public int getNextNodeOffset(int index, PrefixTreeBlockMeta blockMeta) {
int fIntWidth = blockMeta.getNextNodeOffsetWidth();
int startIndex = nextNodeOffsetsOffset + fIntWidth * index;
return (int) UFIntTool.fromBytes(block, startIndex, fIntWidth);
}
public String getBranchNubLeafIndicator() {
if (isNub()) {
return "N";
}
return isBranch() ? "B" : "L";
}
public boolean hasChildren() {
return fanOut > 0;
}
public int getLastFanIndex() {
return fanOut - 1;
}
public int getLastCellIndex() {
return numCells - 1;
}
public int getNumCells() {
return numCells;
}
public int getFanOut() {
return fanOut;
}
public byte[] getToken() {
byte[] newToken = new byte[tokenLength];
block.get(tokenOffset, newToken, 0, tokenLength);
return newToken;
}
public int getOffset() {
return offset;
}
public int whichFanNode(byte searchForByte) {
if( ! hasFan()){
throw new IllegalStateException("This row node has no fan, so can't search it");
}
int fanIndexInBlock = ByteBuff.unsignedBinarySearch(block, fanOffset, fanOffset + fanOut,
searchForByte);
if (fanIndexInBlock >= 0) {// found it, but need to adjust for position of fan in overall block
return fanIndexInBlock - fanOffset;
}
return fanIndexInBlock + fanOffset;// didn't find it, so compensate in reverse
}
public void resetFanIndex() {
fanIndex = -1;// just the way the logic currently works
}
public int getFanIndex() {
return fanIndex;
}
public void setFanIndex(int fanIndex) {
this.fanIndex = fanIndex;
}
public boolean hasFan(){
return fanOut > 0;
}
public boolean hasPreviousFanNodes() {
return fanOut > 0 && fanIndex > 0;
}
public boolean hasMoreFanNodes() {
return fanIndex < getLastFanIndex();
}
public boolean isOnLastFanNode() {
return !hasMoreFanNodes();
}
/*************** standard methods **************************/
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
sb.append("fan:" + ByteBuff.toStringBinary(block, fanOffset, fanOut));
sb.append(",token:" + ByteBuff.toStringBinary(block, tokenOffset, tokenLength));
sb.append(",numCells:" + numCells);
sb.append(",fanIndex:"+fanIndex);
if(fanIndex>=0){
sb.append("("+getFanByteReadable(fanIndex)+")");
}
return sb.toString();
}
}

View File

@ -1,58 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.codec.prefixtree.decode.timestamp;
import org.apache.yetus.audience.InterfaceAudience;
import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeBlockMeta;
import org.apache.hadoop.hbase.nio.ByteBuff;
import org.apache.hadoop.hbase.util.vint.UFIntTool;
/**
* Given a block and its blockMeta, this will decode the MvccVersion for the i-th Cell in the block.
*/
@InterfaceAudience.Private
public class MvccVersionDecoder {
protected PrefixTreeBlockMeta blockMeta;
protected ByteBuff block;
/************** construct ***********************/
public MvccVersionDecoder() {
}
public void initOnBlock(PrefixTreeBlockMeta blockMeta, ByteBuff block) {
this.block = block;
this.blockMeta = blockMeta;
}
/************** methods *************************/
public long getMvccVersion(int index) {
if (blockMeta.getMvccVersionIndexWidth() == 0) {//all mvccVersions in the block were identical
return blockMeta.getMinMvccVersion();
}
int startIndex = blockMeta.getAbsoluteMvccVersionOffset()
+ blockMeta.getMvccVersionDeltaWidth() * index;
long delta = UFIntTool.fromBytes(block, startIndex, blockMeta.getMvccVersionDeltaWidth());
return blockMeta.getMinMvccVersion() + delta;
}
}

View File

@ -1,58 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.codec.prefixtree.decode.timestamp;
import org.apache.yetus.audience.InterfaceAudience;
import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeBlockMeta;
import org.apache.hadoop.hbase.nio.ByteBuff;
import org.apache.hadoop.hbase.util.vint.UFIntTool;
/**
* Given a block and its blockMeta, this will decode the timestamp for the i-th Cell in the block.
*/
@InterfaceAudience.Private
public class TimestampDecoder {
protected PrefixTreeBlockMeta blockMeta;
protected ByteBuff block;
/************** construct ***********************/
public TimestampDecoder() {
}
public void initOnBlock(PrefixTreeBlockMeta blockMeta, ByteBuff block) {
this.block = block;
this.blockMeta = blockMeta;
}
/************** methods *************************/
public long getLong(int index) {
if (blockMeta.getTimestampIndexWidth() == 0) {//all timestamps in the block were identical
return blockMeta.getMinTimestamp();
}
int startIndex = blockMeta.getAbsoluteTimestampOffset() + blockMeta.getTimestampDeltaWidth()
* index;
long delta = UFIntTool.fromBytes(block, startIndex, blockMeta.getTimestampDeltaWidth());
return blockMeta.getMinTimestamp() + delta;
}
}

View File

@ -1,56 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.codec.prefixtree.encode;
import java.io.OutputStream;
import org.apache.yetus.audience.InterfaceAudience;
/**
* Retrieve PrefixTreeEncoders from this factory which handles pooling them and preparing the
* ones retrieved from the pool for usage.
*/
@InterfaceAudience.Private
public class EncoderFactory {
private static final EncoderPool POOL = new EncoderPoolImpl();
public static PrefixTreeEncoder checkOut(OutputStream outputStream, boolean includeMvccVersion) {
return POOL.checkOut(outputStream, includeMvccVersion);
}
public static void checkIn(PrefixTreeEncoder encoder) {
POOL.checkIn(encoder);
}
/**************************** helper ******************************/
protected static PrefixTreeEncoder prepareEncoder(PrefixTreeEncoder encoder,
OutputStream outputStream, boolean includeMvccVersion) {
PrefixTreeEncoder ret = encoder;
if (encoder == null) {
ret = new PrefixTreeEncoder(outputStream, includeMvccVersion);
}
ret.reset(outputStream, includeMvccVersion);
return ret;
}
}

View File

@ -1,32 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.codec.prefixtree.encode;
import java.io.OutputStream;
import org.apache.yetus.audience.InterfaceAudience;
@InterfaceAudience.Private
public interface EncoderPool {
PrefixTreeEncoder checkOut(OutputStream outputStream, boolean includeMvccVersion);
void checkIn(PrefixTreeEncoder encoder);
}

View File

@ -1,46 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.codec.prefixtree.encode;
import java.io.OutputStream;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.LinkedBlockingQueue;
import org.apache.yetus.audience.InterfaceAudience;
@InterfaceAudience.Private
public class EncoderPoolImpl implements EncoderPool {
private BlockingQueue<PrefixTreeEncoder> unusedEncoders = new LinkedBlockingQueue<>();
@Override
public PrefixTreeEncoder checkOut(OutputStream outputStream, boolean includeMvccVersion) {
PrefixTreeEncoder encoder = unusedEncoders.poll();
if (encoder == null) {
encoder = new PrefixTreeEncoder(outputStream, includeMvccVersion);
} else {
encoder.reset(outputStream, includeMvccVersion);
}
return encoder;
}
@Override
public void checkIn(PrefixTreeEncoder encoder) {
this.unusedEncoders.add(encoder);
}
}

View File

@ -1,542 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.codec.prefixtree.encode;
import java.io.IOException;
import java.io.OutputStream;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.yetus.audience.InterfaceAudience;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.PrivateCellUtil;
import org.apache.hadoop.hbase.KeyValueUtil;
import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeBlockMeta;
import org.apache.hadoop.hbase.codec.prefixtree.encode.column.ColumnSectionWriter;
import org.apache.hadoop.hbase.codec.prefixtree.encode.other.CellTypeEncoder;
import org.apache.hadoop.hbase.codec.prefixtree.encode.other.ColumnNodeType;
import org.apache.hadoop.hbase.codec.prefixtree.encode.other.LongEncoder;
import org.apache.hadoop.hbase.codec.prefixtree.encode.row.RowSectionWriter;
import org.apache.hadoop.hbase.codec.prefixtree.encode.tokenize.Tokenizer;
import org.apache.hadoop.hbase.io.CellOutputStream;
import org.apache.hadoop.hbase.util.ArrayUtils;
import org.apache.hadoop.hbase.util.ByteRange;
import org.apache.hadoop.hbase.util.SimpleMutableByteRange;
import org.apache.hadoop.hbase.util.byterange.ByteRangeSet;
import org.apache.hadoop.hbase.util.byterange.impl.ByteRangeHashSet;
import org.apache.hadoop.hbase.util.byterange.impl.ByteRangeTreeSet;
import org.apache.hadoop.hbase.util.vint.UFIntTool;
import org.apache.hadoop.io.WritableUtils;
/**
* This is the primary class for converting a CellOutputStream into an encoded byte[]. As Cells are
* added they are completely copied into the various encoding structures. This is important because
* usually the cells being fed in during compactions will be transient.<br>
* <br>
* Usage:<br>
* 1) constructor<br>
* 4) append cells in sorted order: write(Cell cell)<br>
* 5) flush()<br>
*/
@InterfaceAudience.Private
public class PrefixTreeEncoder implements CellOutputStream {
/**************** static ************************/
protected static final Log LOG = LogFactory.getLog(PrefixTreeEncoder.class);
//future-proof where HBase supports multiple families in a data block.
public static final boolean MULITPLE_FAMILIES_POSSIBLE = false;
private static final boolean USE_HASH_COLUMN_SORTER = true;
private static final int INITIAL_PER_CELL_ARRAY_SIZES = 256;
private static final int VALUE_BUFFER_INIT_SIZE = 64 * 1024;
/**************** fields *************************/
protected long numResets = 0L;
protected OutputStream outputStream;
/*
* Cannot change during a single block's encoding. If false, then substitute incoming Cell's
* mvccVersion with zero and write out the block as usual.
*/
protected boolean includeMvccVersion;
/*
* reusable ByteRanges used for communicating with the sorters/compilers
*/
protected ByteRange rowRange;
protected ByteRange familyRange;
protected ByteRange qualifierRange;
protected ByteRange tagsRange;
/*
* incoming Cell fields are copied into these arrays
*/
protected long[] timestamps;
protected long[] mvccVersions;
protected byte[] typeBytes;
protected int[] valueOffsets;
protected int[] tagsOffsets;
protected byte[] values;
protected byte[] tags;
protected PrefixTreeBlockMeta blockMeta;
/*
* Sub-encoders for the simple long/byte fields of a Cell. Add to these as each cell arrives and
* compile before flushing.
*/
protected LongEncoder timestampEncoder;
protected LongEncoder mvccVersionEncoder;
protected CellTypeEncoder cellTypeEncoder;
/*
* Structures used for collecting families and qualifiers, de-duplicating them, and sorting them
* so they can be passed to the tokenizers. Unlike row keys where we can detect duplicates by
* comparing only with the previous row key, families and qualifiers can arrive in unsorted order
* in blocks spanning multiple rows. We must collect them all into a set to de-duplicate them.
*/
protected ByteRangeSet familyDeduplicator;
protected ByteRangeSet qualifierDeduplicator;
protected ByteRangeSet tagsDeduplicator;
/*
* Feed sorted byte[]s into these tokenizers which will convert the byte[]s to an in-memory
* trie structure with nodes connected by memory pointers (not serializable yet).
*/
protected Tokenizer rowTokenizer;
protected Tokenizer familyTokenizer;
protected Tokenizer qualifierTokenizer;
protected Tokenizer tagsTokenizer;
/*
* Writers take an in-memory trie, sort the nodes, calculate offsets and lengths, and write
* all information to an output stream of bytes that can be stored on disk.
*/
protected RowSectionWriter rowWriter;
protected ColumnSectionWriter familyWriter;
protected ColumnSectionWriter qualifierWriter;
protected ColumnSectionWriter tagsWriter;
/*
* Integers used for counting cells and bytes. We keep track of the size of the Cells as if they
* were full KeyValues because some parts of HBase like to know the "unencoded size".
*/
protected int totalCells = 0;
protected int totalUnencodedBytes = 0;//numBytes if the cells were KeyValues
protected int totalValueBytes = 0;
protected int totalTagBytes = 0;
protected int maxValueLength = 0;
protected int maxTagLength = 0;
protected int totalBytes = 0;//
/***************** construct ***********************/
public PrefixTreeEncoder(OutputStream outputStream, boolean includeMvccVersion) {
// used during cell accumulation
this.blockMeta = new PrefixTreeBlockMeta();
this.rowRange = new SimpleMutableByteRange();
this.familyRange = new SimpleMutableByteRange();
this.qualifierRange = new SimpleMutableByteRange();
this.timestamps = new long[INITIAL_PER_CELL_ARRAY_SIZES];
this.mvccVersions = new long[INITIAL_PER_CELL_ARRAY_SIZES];
this.typeBytes = new byte[INITIAL_PER_CELL_ARRAY_SIZES];
this.valueOffsets = new int[INITIAL_PER_CELL_ARRAY_SIZES];
this.values = new byte[VALUE_BUFFER_INIT_SIZE];
// used during compilation
this.familyDeduplicator = USE_HASH_COLUMN_SORTER ? new ByteRangeHashSet()
: new ByteRangeTreeSet();
this.qualifierDeduplicator = USE_HASH_COLUMN_SORTER ? new ByteRangeHashSet()
: new ByteRangeTreeSet();
this.timestampEncoder = new LongEncoder();
this.mvccVersionEncoder = new LongEncoder();
this.cellTypeEncoder = new CellTypeEncoder();
this.rowTokenizer = new Tokenizer();
this.familyTokenizer = new Tokenizer();
this.qualifierTokenizer = new Tokenizer();
this.rowWriter = new RowSectionWriter();
this.familyWriter = new ColumnSectionWriter();
this.qualifierWriter = new ColumnSectionWriter();
initializeTagHelpers();
reset(outputStream, includeMvccVersion);
}
public void reset(OutputStream outputStream, boolean includeMvccVersion) {
++numResets;
this.includeMvccVersion = includeMvccVersion;
this.outputStream = outputStream;
valueOffsets[0] = 0;
familyDeduplicator.reset();
qualifierDeduplicator.reset();
tagsDeduplicator.reset();
tagsWriter.reset();
tagsTokenizer.reset();
rowTokenizer.reset();
timestampEncoder.reset();
mvccVersionEncoder.reset();
cellTypeEncoder.reset();
familyTokenizer.reset();
qualifierTokenizer.reset();
rowWriter.reset();
familyWriter.reset();
qualifierWriter.reset();
totalCells = 0;
totalUnencodedBytes = 0;
totalValueBytes = 0;
maxValueLength = 0;
totalBytes = 0;
}
protected void initializeTagHelpers() {
this.tagsRange = new SimpleMutableByteRange();
this.tagsDeduplicator = USE_HASH_COLUMN_SORTER ? new ByteRangeHashSet()
: new ByteRangeTreeSet();
this.tagsTokenizer = new Tokenizer();
this.tagsWriter = new ColumnSectionWriter();
}
/**
* Check that the arrays used to hold cell fragments are large enough for the cell that is being
* added. Since the PrefixTreeEncoder is cached between uses, these arrays may grow during the
* first few block encodings but should stabilize quickly.
*/
protected void ensurePerCellCapacities() {
int currentCapacity = valueOffsets.length;
int neededCapacity = totalCells + 2;// some things write one index ahead. +2 to be safe
if (neededCapacity < currentCapacity) {
return;
}
int padding = neededCapacity;//this will double the array size
timestamps = ArrayUtils.growIfNecessary(timestamps, neededCapacity, padding);
mvccVersions = ArrayUtils.growIfNecessary(mvccVersions, neededCapacity, padding);
typeBytes = ArrayUtils.growIfNecessary(typeBytes, neededCapacity, padding);
valueOffsets = ArrayUtils.growIfNecessary(valueOffsets, neededCapacity, padding);
}
/******************** CellOutputStream methods *************************/
/**
* Note: Unused until support is added to the scanner/heap
* <p/>
* The following method are optimized versions of write(Cell cell). The result should be
* identical, however the implementation may be able to execute them much more efficiently because
* it does not need to compare the unchanged fields with the previous cell's.
* <p/>
* Consider the benefits during compaction when paired with a CellScanner that is also aware of
* row boundaries. The CellScanner can easily use these methods instead of blindly passing Cells
* to the write(Cell cell) method.
* <p/>
* The savings of skipping duplicate row detection are significant with long row keys. A
* DataBlockEncoder may store a row key once in combination with a count of how many cells are in
* the row. With a 100 byte row key, we can replace 100 byte comparisons with a single increment
* of the counter, and that is for every cell in the row.
*/
/**
* Add a Cell to the output stream but repeat the previous row.
*/
//@Override
public void writeWithRepeatRow(Cell cell) {
ensurePerCellCapacities();//can we optimize away some of this?
//save a relatively expensive row comparison, incrementing the row's counter instead
rowTokenizer.incrementNumOccurrencesOfLatestValue();
addFamilyPart(cell);
addQualifierPart(cell);
addAfterRowFamilyQualifier(cell);
}
@Override
public void write(Cell cell) {
ensurePerCellCapacities();
rowTokenizer.addSorted(PrivateCellUtil.fillRowRange(cell, rowRange));
addFamilyPart(cell);
addQualifierPart(cell);
addTagPart(cell);
addAfterRowFamilyQualifier(cell);
}
private void addTagPart(Cell cell) {
PrivateCellUtil.fillTagRange(cell, tagsRange);
tagsDeduplicator.add(tagsRange);
}
/***************** internal add methods ************************/
private void addAfterRowFamilyQualifier(Cell cell){
// timestamps
timestamps[totalCells] = cell.getTimestamp();
timestampEncoder.add(cell.getTimestamp());
// memstore timestamps
if (includeMvccVersion) {
mvccVersions[totalCells] = cell.getSequenceId();
mvccVersionEncoder.add(cell.getSequenceId());
totalUnencodedBytes += WritableUtils.getVIntSize(cell.getSequenceId());
}else{
//must overwrite in case there was a previous version in this array slot
mvccVersions[totalCells] = 0L;
if(totalCells == 0){//only need to do this for the first cell added
mvccVersionEncoder.add(0L);
}
//totalUncompressedBytes += 0;//mvccVersion takes zero bytes when disabled
}
// types
typeBytes[totalCells] = cell.getTypeByte();
cellTypeEncoder.add(cell.getTypeByte());
// values
totalValueBytes += cell.getValueLength();
// double the array each time we run out of space
values = ArrayUtils.growIfNecessary(values, totalValueBytes, 2 * totalValueBytes);
CellUtil.copyValueTo(cell, values, valueOffsets[totalCells]);
if (cell.getValueLength() > maxValueLength) {
maxValueLength = cell.getValueLength();
}
valueOffsets[totalCells + 1] = totalValueBytes;
// general
totalUnencodedBytes += KeyValueUtil.length(cell);
++totalCells;
}
private void addFamilyPart(Cell cell) {
if (MULITPLE_FAMILIES_POSSIBLE || totalCells == 0) {
PrivateCellUtil.fillFamilyRange(cell, familyRange);
familyDeduplicator.add(familyRange);
}
}
private void addQualifierPart(Cell cell) {
PrivateCellUtil.fillQualifierRange(cell, qualifierRange);
qualifierDeduplicator.add(qualifierRange);
}
/****************** compiling/flushing ********************/
/**
* Expensive method. The second half of the encoding work happens here.
*
* Take all the separate accumulated data structures and turn them into a single stream of bytes
* which is written to the outputStream.
*/
@Override
public void flush() throws IOException {
compile();
// do the actual flushing to the output stream. Order matters.
blockMeta.writeVariableBytesToOutputStream(outputStream);
rowWriter.writeBytes(outputStream);
familyWriter.writeBytes(outputStream);
qualifierWriter.writeBytes(outputStream);
tagsWriter.writeBytes(outputStream);
timestampEncoder.writeBytes(outputStream);
mvccVersionEncoder.writeBytes(outputStream);
//CellType bytes are in the row nodes. there is no additional type section
outputStream.write(values, 0, totalValueBytes);
}
/**
* Now that all the cells have been added, do the work to reduce them to a series of byte[]
* fragments that are ready to be written to the output stream.
*/
protected void compile(){
blockMeta.setNumKeyValueBytes(totalUnencodedBytes);
int lastValueOffset = valueOffsets[totalCells];
blockMeta.setValueOffsetWidth(UFIntTool.numBytes(lastValueOffset));
blockMeta.setValueLengthWidth(UFIntTool.numBytes(maxValueLength));
blockMeta.setNumValueBytes(totalValueBytes);
totalBytes += totalTagBytes + totalValueBytes;
//these compile methods will add to totalBytes
compileTypes();
compileMvccVersions();
compileTimestamps();
compileTags();
compileQualifiers();
compileFamilies();
compileRows();
int numMetaBytes = blockMeta.calculateNumMetaBytes();
blockMeta.setNumMetaBytes(numMetaBytes);
totalBytes += numMetaBytes;
}
/**
* <p>
* The following "compile" methods do any intermediate work necessary to transform the cell
* fragments collected during the writing phase into structures that are ready to write to the
* outputStream.
* </p>
* The family and qualifier treatment is almost identical, as is timestamp and mvccVersion.
*/
protected void compileTypes() {
blockMeta.setAllSameType(cellTypeEncoder.areAllSameType());
if(cellTypeEncoder.areAllSameType()){
blockMeta.setAllTypes(cellTypeEncoder.getOnlyType());
}
}
protected void compileMvccVersions() {
mvccVersionEncoder.compile();
blockMeta.setMvccVersionFields(mvccVersionEncoder);
int numMvccVersionBytes = mvccVersionEncoder.getOutputArrayLength();
totalBytes += numMvccVersionBytes;
}
protected void compileTimestamps() {
timestampEncoder.compile();
blockMeta.setTimestampFields(timestampEncoder);
int numTimestampBytes = timestampEncoder.getOutputArrayLength();
totalBytes += numTimestampBytes;
}
protected void compileQualifiers() {
blockMeta.setNumUniqueQualifiers(qualifierDeduplicator.size());
qualifierDeduplicator.compile();
qualifierTokenizer.addAll(qualifierDeduplicator.getSortedRanges());
qualifierWriter.reconstruct(blockMeta, qualifierTokenizer, ColumnNodeType.QUALIFIER);
qualifierWriter.compile();
int numQualifierBytes = qualifierWriter.getNumBytes();
blockMeta.setNumQualifierBytes(numQualifierBytes);
totalBytes += numQualifierBytes;
}
protected void compileFamilies() {
blockMeta.setNumUniqueFamilies(familyDeduplicator.size());
familyDeduplicator.compile();
familyTokenizer.addAll(familyDeduplicator.getSortedRanges());
familyWriter.reconstruct(blockMeta, familyTokenizer, ColumnNodeType.FAMILY);
familyWriter.compile();
int numFamilyBytes = familyWriter.getNumBytes();
blockMeta.setNumFamilyBytes(numFamilyBytes);
totalBytes += numFamilyBytes;
}
protected void compileTags() {
blockMeta.setNumUniqueTags(tagsDeduplicator.size());
tagsDeduplicator.compile();
tagsTokenizer.addAll(tagsDeduplicator.getSortedRanges());
tagsWriter.reconstruct(blockMeta, tagsTokenizer, ColumnNodeType.TAGS);
tagsWriter.compile();
int numTagBytes = tagsWriter.getNumBytes();
blockMeta.setNumTagsBytes(numTagBytes);
totalBytes += numTagBytes;
}
protected void compileRows() {
rowWriter.reconstruct(this);
rowWriter.compile();
int numRowBytes = rowWriter.getNumBytes();
blockMeta.setNumRowBytes(numRowBytes);
blockMeta.setRowTreeDepth(rowTokenizer.getTreeDepth());
totalBytes += numRowBytes;
}
/********************* convenience getters ********************************/
public long getValueOffset(int index) {
return valueOffsets[index];
}
public int getValueLength(int index) {
return (int) (valueOffsets[index + 1] - valueOffsets[index]);
}
/************************* get/set *************************************/
public PrefixTreeBlockMeta getBlockMeta() {
return blockMeta;
}
public Tokenizer getRowTokenizer() {
return rowTokenizer;
}
public LongEncoder getTimestampEncoder() {
return timestampEncoder;
}
public int getTotalBytes() {
return totalBytes;
}
public long[] getTimestamps() {
return timestamps;
}
public long[] getMvccVersions() {
return mvccVersions;
}
public byte[] getTypeBytes() {
return typeBytes;
}
public LongEncoder getMvccVersionEncoder() {
return mvccVersionEncoder;
}
public ByteRangeSet getFamilySorter() {
return familyDeduplicator;
}
public ByteRangeSet getQualifierSorter() {
return qualifierDeduplicator;
}
public ByteRangeSet getTagSorter() {
return tagsDeduplicator;
}
public ColumnSectionWriter getFamilyWriter() {
return familyWriter;
}
public ColumnSectionWriter getQualifierWriter() {
return qualifierWriter;
}
public ColumnSectionWriter getTagWriter() {
return tagsWriter;
}
public RowSectionWriter getRowWriter() {
return rowWriter;
}
public ByteRange getValueByteRange() {
return new SimpleMutableByteRange(values, 0, totalValueBytes);
}
}

View File

@ -1,136 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.codec.prefixtree.encode.column;
import java.io.IOException;
import java.io.OutputStream;
import org.apache.yetus.audience.InterfaceAudience;
import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeBlockMeta;
import org.apache.hadoop.hbase.codec.prefixtree.encode.other.ColumnNodeType;
import org.apache.hadoop.hbase.codec.prefixtree.encode.tokenize.TokenizerNode;
import org.apache.hadoop.hbase.util.ByteRange;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.Strings;
import org.apache.hadoop.hbase.util.vint.UFIntTool;
import org.apache.hadoop.hbase.util.vint.UVIntTool;
/**
* <p>
* Column nodes can be either family nodes or qualifier nodes, as both sections encode similarly.
* The family and qualifier sections of the data block are made of 1 or more of these nodes.
* </p>
* Each node is composed of 3 sections:<br>
* <ul>
* <li>tokenLength: UVInt (normally 1 byte) indicating the number of token bytes
* <li>token[]: the actual token bytes
* <li>parentStartPosition: the offset of the next node from the start of the family or qualifier
* section
* </ul>
*/
@InterfaceAudience.Private
public class ColumnNodeWriter{
/************* fields ****************************/
protected TokenizerNode builderNode;
protected PrefixTreeBlockMeta blockMeta;
protected int tokenLength;
protected byte[] token;
protected int parentStartPosition;
protected ColumnNodeType nodeType;
/*************** construct **************************/
public ColumnNodeWriter(PrefixTreeBlockMeta blockMeta, TokenizerNode builderNode,
ColumnNodeType nodeType) {
this.blockMeta = blockMeta;
this.builderNode = builderNode;
this.nodeType = nodeType;
calculateTokenLength();
}
/************* methods *******************************/
public boolean isRoot() {
return parentStartPosition == 0;
}
private void calculateTokenLength() {
tokenLength = builderNode.getTokenLength();
token = new byte[tokenLength];
}
/**
* This method is called before blockMeta.qualifierOffsetWidth is known, so we pass in a
* placeholder.
* @param offsetWidthPlaceholder the placeholder
* @return node width
*/
public int getWidthUsingPlaceholderForOffsetWidth(int offsetWidthPlaceholder) {
int width = 0;
width += UVIntTool.numBytes(tokenLength);
width += token.length;
width += offsetWidthPlaceholder;
return width;
}
public void writeBytes(OutputStream os) throws IOException {
int parentOffsetWidth;
if (this.nodeType == ColumnNodeType.FAMILY) {
parentOffsetWidth = blockMeta.getFamilyOffsetWidth();
} else if (this.nodeType == ColumnNodeType.QUALIFIER) {
parentOffsetWidth = blockMeta.getQualifierOffsetWidth();
} else {
parentOffsetWidth = blockMeta.getTagsOffsetWidth();
}
UVIntTool.writeBytes(tokenLength, os);
os.write(token);
UFIntTool.writeBytes(parentOffsetWidth, parentStartPosition, os);
}
public void setTokenBytes(ByteRange source) {
source.deepCopySubRangeTo(0, tokenLength, token, 0);
}
/****************** standard methods ************************/
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
sb.append(Strings.padFront(builderNode.getOutputArrayOffset() + "", ' ', 3) + ",");
sb.append("[");
sb.append(Bytes.toString(token));
sb.append("]->");
sb.append(parentStartPosition);
return sb.toString();
}
/************************** get/set ***********************/
public void setParentStartPosition(int parentStartPosition) {
this.parentStartPosition = parentStartPosition;
}
}

View File

@ -1,209 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.codec.prefixtree.encode.column;
import java.io.IOException;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.List;
import org.apache.yetus.audience.InterfaceAudience;
import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeBlockMeta;
import org.apache.hadoop.hbase.codec.prefixtree.encode.other.ColumnNodeType;
import org.apache.hadoop.hbase.codec.prefixtree.encode.tokenize.Tokenizer;
import org.apache.hadoop.hbase.codec.prefixtree.encode.tokenize.TokenizerNode;
import org.apache.hadoop.hbase.util.CollectionUtils;
import org.apache.hadoop.hbase.util.vint.UFIntTool;
import org.apache.hadoop.hbase.shaded.com.google.common.collect.Lists;
/**
* <p>
* Takes the tokenized family or qualifier data and flattens it into a stream of bytes. The family
* section is written after the row section, and qualifier section after family section.
* </p>
* The family and qualifier tries, or "column tries", are structured differently than the row trie.
* The trie cannot be reassembled without external data about the offsets of the leaf nodes, and
* these external pointers are stored in the nubs and leaves of the row trie. For each cell in a
* row, the row trie contains a list of offsets into the column sections (along with pointers to
* timestamps and other per-cell fields). These offsets point to the last column node/token that
* comprises the column name. To assemble the column name, the trie is traversed in reverse (right
* to left), with the rightmost tokens pointing to the start of their "parent" node which is the
* node to the left.
* <p>
* This choice was made to reduce the size of the column trie by storing the minimum amount of
* offset data. As a result, to find a specific qualifier within a row, you must do a binary search
* of the column nodes, reassembling each one as you search. Future versions of the PrefixTree might
* encode the columns in both a forward and reverse trie, which would convert binary searches into
* more efficient trie searches which would be beneficial for wide rows.
* </p>
*/
@InterfaceAudience.Private
public class ColumnSectionWriter {
public static final int EXPECTED_NUBS_PLUS_LEAVES = 100;
/****************** fields ****************************/
private PrefixTreeBlockMeta blockMeta;
private ColumnNodeType nodeType;
private Tokenizer tokenizer;
private int numBytes = 0;
private ArrayList<TokenizerNode> nonLeaves;
private ArrayList<TokenizerNode> leaves;
private ArrayList<TokenizerNode> allNodes;
private ArrayList<ColumnNodeWriter> columnNodeWriters;
private List<Integer> outputArrayOffsets;
/*********************** construct *********************/
public ColumnSectionWriter() {
this.nonLeaves = Lists.newArrayList();
this.leaves = Lists.newArrayList();
this.outputArrayOffsets = Lists.newArrayList();
}
public ColumnSectionWriter(PrefixTreeBlockMeta blockMeta, Tokenizer builder,
ColumnNodeType nodeType) {
this();// init collections
reconstruct(blockMeta, builder, nodeType);
}
public void reconstruct(PrefixTreeBlockMeta blockMeta, Tokenizer builder,
ColumnNodeType nodeType) {
this.blockMeta = blockMeta;
this.tokenizer = builder;
this.nodeType = nodeType;
}
public void reset() {
numBytes = 0;
nonLeaves.clear();
leaves.clear();
outputArrayOffsets.clear();
}
/****************** methods *******************************/
public ColumnSectionWriter compile() {
if (this.nodeType == ColumnNodeType.FAMILY) {
// do nothing. max family length fixed at Byte.MAX_VALUE
} else if (this.nodeType == ColumnNodeType.QUALIFIER) {
blockMeta.setMaxQualifierLength(tokenizer.getMaxElementLength());
} else {
blockMeta.setMaxTagsLength(tokenizer.getMaxElementLength());
}
compilerInternals();
return this;
}
protected void compilerInternals() {
tokenizer.setNodeFirstInsertionIndexes();
tokenizer.appendNodes(nonLeaves, true, false);
tokenizer.appendNodes(leaves, false, true);
allNodes = Lists.newArrayListWithCapacity(nonLeaves.size() + leaves.size());
allNodes.addAll(nonLeaves);
allNodes.addAll(leaves);
columnNodeWriters = Lists.newArrayListWithCapacity(CollectionUtils.nullSafeSize(allNodes));
for (int i = 0; i < allNodes.size(); ++i) {
TokenizerNode node = allNodes.get(i);
columnNodeWriters.add(new ColumnNodeWriter(blockMeta, node, this.nodeType));
}
// leaf widths are known at this point, so add them up
int totalBytesWithoutOffsets = 0;
for (int i = allNodes.size() - 1; i >= 0; --i) {
ColumnNodeWriter columnNodeWriter = columnNodeWriters.get(i);
// leaves store all but their first token byte
totalBytesWithoutOffsets += columnNodeWriter.getWidthUsingPlaceholderForOffsetWidth(0);
}
// figure out how wide our offset FInts are
int parentOffsetWidth = 0;
while (true) {
++parentOffsetWidth;
int numBytesFinder = totalBytesWithoutOffsets + parentOffsetWidth * allNodes.size();
if (numBytesFinder < UFIntTool.maxValueForNumBytes(parentOffsetWidth)) {
numBytes = numBytesFinder;
break;
}// it fits
}
if (this.nodeType == ColumnNodeType.FAMILY) {
blockMeta.setFamilyOffsetWidth(parentOffsetWidth);
} else if (this.nodeType == ColumnNodeType.QUALIFIER) {
blockMeta.setQualifierOffsetWidth(parentOffsetWidth);
} else {
blockMeta.setTagsOffsetWidth(parentOffsetWidth);
}
int forwardIndex = 0;
for (int i = 0; i < allNodes.size(); ++i) {
TokenizerNode node = allNodes.get(i);
ColumnNodeWriter columnNodeWriter = columnNodeWriters.get(i);
int fullNodeWidth = columnNodeWriter
.getWidthUsingPlaceholderForOffsetWidth(parentOffsetWidth);
node.setOutputArrayOffset(forwardIndex);
columnNodeWriter.setTokenBytes(node.getToken());
if (node.isRoot()) {
columnNodeWriter.setParentStartPosition(0);
} else {
columnNodeWriter.setParentStartPosition(node.getParent().getOutputArrayOffset());
}
forwardIndex += fullNodeWidth;
}
tokenizer.appendOutputArrayOffsets(outputArrayOffsets);
}
public void writeBytes(OutputStream os) throws IOException {
for (ColumnNodeWriter columnNodeWriter : columnNodeWriters) {
columnNodeWriter.writeBytes(os);
}
}
/************* get/set **************************/
public ArrayList<ColumnNodeWriter> getColumnNodeWriters() {
return columnNodeWriters;
}
public int getNumBytes() {
return numBytes;
}
public int getOutputArrayOffset(int sortedIndex) {
return outputArrayOffsets.get(sortedIndex);
}
public ArrayList<TokenizerNode> getNonLeaves() {
return nonLeaves;
}
public ArrayList<TokenizerNode> getLeaves() {
return leaves;
}
}

View File

@ -1,68 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.codec.prefixtree.encode.other;
import org.apache.yetus.audience.InterfaceAudience;
/**
* Detect if every KV has the same KeyValue.Type, in which case we don't need to store it for each
* KV. If(allSameType) during conversion to byte[], then we can store the "onlyType" in blockMeta,
* therefore not repeating it for each cell and saving 1 byte per cell.
*/
@InterfaceAudience.Private
public class CellTypeEncoder {
/************* fields *********************/
protected boolean pendingFirstType = true;
protected boolean allSameType = true;
protected byte onlyType;
/************* construct *********************/
public void reset() {
pendingFirstType = true;
allSameType = true;
}
/************* methods *************************/
public void add(byte type) {
if (pendingFirstType) {
onlyType = type;
pendingFirstType = false;
} else if (onlyType != type) {
allSameType = false;
}
}
/**************** get/set **************************/
public boolean areAllSameType() {
return allSameType;
}
public byte getOnlyType() {
return onlyType;
}
}

View File

@ -1,28 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.codec.prefixtree.encode.other;
import org.apache.yetus.audience.InterfaceAudience;
/**
* Specifies the type of columnnode writer.
*/
@InterfaceAudience.Private
public enum ColumnNodeType {
FAMILY, QUALIFIER, TAGS;
}

View File

@ -1,183 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.codec.prefixtree.encode.other;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.util.Arrays;
import java.util.HashSet;
import org.apache.yetus.audience.InterfaceAudience;
import org.apache.hadoop.hbase.util.ArrayUtils;
import org.apache.hadoop.hbase.util.CollectionUtils;
import org.apache.hadoop.hbase.util.vint.UFIntTool;
import org.apache.hadoop.hbase.shaded.com.google.common.base.Joiner;
/**
* Used to de-duplicate, sort, minimize/diff, and serialize timestamps and mvccVersions from a
* collection of Cells.
*
* 1. add longs to a HashSet for fast de-duplication
* 2. keep track of the min and max
* 3. copy all values to a new long[]
* 4. Collections.sort the long[]
* 5. calculate maxDelta = max - min
* 6. determine FInt width based on maxDelta
* 7. PrefixTreeEncoder binary searches to find index of each value
*/
@InterfaceAudience.Private
public class LongEncoder {
/****************** fields ****************************/
protected HashSet<Long> uniqueValues;
protected long[] sortedUniqueValues;
protected long min, max, maxDelta;
protected int bytesPerDelta;
protected int bytesPerIndex;
protected int totalCompressedBytes;
/****************** construct ****************************/
public LongEncoder() {
this.uniqueValues = new HashSet<>();
}
public void reset() {
uniqueValues.clear();
sortedUniqueValues = null;
min = Long.MAX_VALUE;
max = Long.MIN_VALUE;
maxDelta = Long.MIN_VALUE;
bytesPerIndex = 0;
bytesPerDelta = 0;
totalCompressedBytes = 0;
}
/************* methods ***************************/
public void add(long timestamp) {
uniqueValues.add(timestamp);
}
public LongEncoder compile() {
int numUnique = uniqueValues.size();
if (numUnique == 1) {
min = CollectionUtils.getFirst(uniqueValues);
sortedUniqueValues = new long[] { min };
return this;
}
sortedUniqueValues = new long[numUnique];
int lastIndex = -1;
for (long value : uniqueValues) {
sortedUniqueValues[++lastIndex] = value;
}
Arrays.sort(sortedUniqueValues);
min = ArrayUtils.getFirst(sortedUniqueValues);
max = ArrayUtils.getLast(sortedUniqueValues);
maxDelta = max - min;
if (maxDelta > 0) {
bytesPerDelta = UFIntTool.numBytes(maxDelta);
} else {
bytesPerDelta = 0;
}
int maxIndex = numUnique - 1;
bytesPerIndex = UFIntTool.numBytes(maxIndex);
totalCompressedBytes = numUnique * bytesPerDelta;
return this;
}
public long getDelta(int index) {
if (sortedUniqueValues.length == 0) {
return 0;
}
return sortedUniqueValues[index] - min;
}
public int getIndex(long value) {
// should always find an exact match
return Arrays.binarySearch(sortedUniqueValues, value);
}
public void writeBytes(OutputStream os) throws IOException {
for (int i = 0; i < sortedUniqueValues.length; ++i) {
long delta = sortedUniqueValues[i] - min;
UFIntTool.writeBytes(bytesPerDelta, delta, os);
}
}
//convenience method for tests
public byte[] getByteArray() throws IOException{
ByteArrayOutputStream baos = new ByteArrayOutputStream();
writeBytes(baos);
return baos.toByteArray();
}
public int getOutputArrayLength() {
return sortedUniqueValues.length * bytesPerDelta;
}
public int getNumUniqueValues() {
return sortedUniqueValues.length;
}
/******************* Object methods **********************/
@Override
public String toString() {
if (ArrayUtils.isEmpty(sortedUniqueValues)) {
return "[]";
}
return "[" + Joiner.on(",").join(ArrayUtils.toList(sortedUniqueValues)) + "]";
}
/******************** get/set **************************/
public long getMin() {
return min;
}
public int getBytesPerDelta() {
return bytesPerDelta;
}
public int getBytesPerIndex() {
return bytesPerIndex;
}
public int getTotalCompressedBytes() {
return totalCompressedBytes;
}
public long[] getSortedUniqueTimestamps() {
return sortedUniqueValues;
}
}

View File

@ -1,300 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.codec.prefixtree.encode.row;
import java.io.IOException;
import java.io.OutputStream;
import java.util.ArrayList;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.yetus.audience.InterfaceAudience;
import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeBlockMeta;
import org.apache.hadoop.hbase.codec.prefixtree.encode.PrefixTreeEncoder;
import org.apache.hadoop.hbase.codec.prefixtree.encode.tokenize.TokenizerNode;
import org.apache.hadoop.hbase.util.ByteRangeUtils;
import org.apache.hadoop.hbase.util.CollectionUtils;
import org.apache.hadoop.hbase.util.vint.UFIntTool;
import org.apache.hadoop.hbase.util.vint.UVIntTool;
/**
* Serializes the fields comprising one node of the row trie, which can be a branch, nub, or leaf.
* Please see the write() method for the order in which data is written.
*/
@InterfaceAudience.Private
public class RowNodeWriter{
protected static final Log LOG = LogFactory.getLog(RowNodeWriter.class);
/********************* fields ******************************/
protected PrefixTreeEncoder prefixTreeEncoder;
protected PrefixTreeBlockMeta blockMeta;
protected TokenizerNode tokenizerNode;
protected int tokenWidth;
protected int fanOut;
protected int numCells;
protected int width;
/*********************** construct *************************/
public RowNodeWriter(PrefixTreeEncoder keyValueBuilder, TokenizerNode tokenizerNode) {
reconstruct(keyValueBuilder, tokenizerNode);
}
public void reconstruct(PrefixTreeEncoder prefixTreeEncoder, TokenizerNode tokenizerNode) {
this.prefixTreeEncoder = prefixTreeEncoder;
reset(tokenizerNode);
}
public void reset(TokenizerNode node) {
this.blockMeta = prefixTreeEncoder.getBlockMeta();// changes between blocks
this.tokenizerNode = node;
this.tokenWidth = 0;
this.fanOut = 0;
this.numCells = 0;
this.width = 0;
calculateOffsetsAndLengths();
}
/********************* methods ****************************/
protected void calculateOffsetsAndLengths(){
tokenWidth = tokenizerNode.getTokenLength();
if(!tokenizerNode.isRoot()){
--tokenWidth;//root has no parent
}
fanOut = CollectionUtils.nullSafeSize(tokenizerNode.getChildren());
numCells = tokenizerNode.getNumOccurrences();
}
public int calculateWidth(){
calculateWidthOverrideOffsetWidth(blockMeta.getNextNodeOffsetWidth());
return width;
}
public int calculateWidthOverrideOffsetWidth(int offsetWidth){
width = 0;
width += UVIntTool.numBytes(tokenWidth);
width += tokenWidth;
width += UVIntTool.numBytes(fanOut);
width += fanOut;
width += UVIntTool.numBytes(numCells);
if(tokenizerNode.hasOccurrences()){
int fixedBytesPerCell = blockMeta.getFamilyOffsetWidth()
+ blockMeta.getQualifierOffsetWidth()
+ blockMeta.getTagsOffsetWidth()
+ blockMeta.getTimestampIndexWidth()
+ blockMeta.getMvccVersionIndexWidth()
+ blockMeta.getKeyValueTypeWidth()
+ blockMeta.getValueOffsetWidth()
+ blockMeta.getValueLengthWidth();
width += numCells * fixedBytesPerCell;
}
if (!tokenizerNode.isLeaf()) {
width += fanOut * offsetWidth;
}
return width;
}
/*********************** writing the compiled structure to the OutputStream ***************/
public void write(OutputStream os) throws IOException{
//info about this row trie node
writeRowToken(os);
writeFan(os);
writeNumCells(os);
//UFInt indexes and offsets for each cell in the row (if nub or leaf)
writeFamilyNodeOffsets(os);
writeQualifierNodeOffsets(os);
writeTagNodeOffsets(os);
writeTimestampIndexes(os);
writeMvccVersionIndexes(os);
writeCellTypes(os);
writeValueOffsets(os);
writeValueLengths(os);
//offsets to the children of this row trie node (if branch or nub)
writeNextRowTrieNodeOffsets(os);
}
/**
* Row node token, fan, and numCells. Written once at the beginning of each row node. These 3
* fields can reproduce all the row keys that compose the block.
*/
/**
* UVInt: tokenWidth
* bytes: token
*/
protected void writeRowToken(OutputStream os) throws IOException {
UVIntTool.writeBytes(tokenWidth, os);
int tokenStartIndex = tokenizerNode.isRoot() ? 0 : 1;
ByteRangeUtils.write(os, tokenizerNode.getToken(), tokenStartIndex);
}
/**
* UVInt: numFanBytes/fanOut
* bytes: each fan byte
*/
public void writeFan(OutputStream os) throws IOException {
UVIntTool.writeBytes(fanOut, os);
if (fanOut <= 0) {
return;
}
ArrayList<TokenizerNode> children = tokenizerNode.getChildren();
for (int i = 0; i < children.size(); ++i) {
TokenizerNode child = children.get(i);
os.write(child.getToken().get(0));// first byte of each child's token
}
}
/**
* UVInt: numCells, the number of cells in this row which will be 0 for branch nodes
*/
protected void writeNumCells(OutputStream os) throws IOException {
UVIntTool.writeBytes(numCells, os);
}
/**
* The following methods write data for each cell in the row, mostly consisting of indexes or
* offsets into the timestamp/column data structures that are written in the middle of the block.
* We use {@link UFIntTool} to encode these indexes/offsets to allow random access during a binary
* search of a particular column/timestamp combination.
* <p>
* Branch nodes will not have any data in these sections.
* </p>
*/
protected void writeFamilyNodeOffsets(OutputStream os) throws IOException {
if (blockMeta.getFamilyOffsetWidth() <= 0) {
return;
}
for (int i = 0; i < numCells; ++i) {
int cellInsertionIndex = PrefixTreeEncoder.MULITPLE_FAMILIES_POSSIBLE ? tokenizerNode
.getFirstInsertionIndex() + i : 0;
int sortedIndex = prefixTreeEncoder.getFamilySorter().getSortedIndexForInsertionId(
cellInsertionIndex);
int indexedFamilyOffset = prefixTreeEncoder.getFamilyWriter().getOutputArrayOffset(
sortedIndex);
UFIntTool.writeBytes(blockMeta.getFamilyOffsetWidth(), indexedFamilyOffset, os);
}
}
protected void writeQualifierNodeOffsets(OutputStream os) throws IOException {
if (blockMeta.getQualifierOffsetWidth() <= 0) {
return;
}
for (int i = 0; i < numCells; ++i) {
int cellInsertionIndex = tokenizerNode.getFirstInsertionIndex() + i;
int sortedIndex = prefixTreeEncoder.getQualifierSorter().getSortedIndexForInsertionId(
cellInsertionIndex);
int indexedQualifierOffset = prefixTreeEncoder.getQualifierWriter().getOutputArrayOffset(
sortedIndex);
UFIntTool.writeBytes(blockMeta.getQualifierOffsetWidth(), indexedQualifierOffset, os);
}
}
protected void writeTagNodeOffsets(OutputStream os) throws IOException {
if (blockMeta.getTagsOffsetWidth() <= 0) {
return;
}
for (int i = 0; i < numCells; ++i) {
int cellInsertionIndex = tokenizerNode.getFirstInsertionIndex() + i;
int sortedIndex = prefixTreeEncoder.getTagSorter().getSortedIndexForInsertionId(
cellInsertionIndex);
int indexedTagOffset = prefixTreeEncoder.getTagWriter().getOutputArrayOffset(
sortedIndex);
UFIntTool.writeBytes(blockMeta.getTagsOffsetWidth(), indexedTagOffset, os);
}
}
protected void writeTimestampIndexes(OutputStream os) throws IOException {
if (blockMeta.getTimestampIndexWidth() <= 0) {
return;
}
for (int i = 0; i < numCells; ++i) {
int cellInsertionIndex = tokenizerNode.getFirstInsertionIndex() + i;
long timestamp = prefixTreeEncoder.getTimestamps()[cellInsertionIndex];
int timestampIndex = prefixTreeEncoder.getTimestampEncoder().getIndex(timestamp);
UFIntTool.writeBytes(blockMeta.getTimestampIndexWidth(), timestampIndex, os);
}
}
protected void writeMvccVersionIndexes(OutputStream os) throws IOException {
if (blockMeta.getMvccVersionIndexWidth() <= 0) {
return;
}
for (int i = 0; i < numCells; ++i) {
int cellInsertionIndex = tokenizerNode.getFirstInsertionIndex() + i;
long mvccVersion = prefixTreeEncoder.getMvccVersions()[cellInsertionIndex];
int mvccVersionIndex = prefixTreeEncoder.getMvccVersionEncoder().getIndex(mvccVersion);
UFIntTool.writeBytes(blockMeta.getMvccVersionIndexWidth(), mvccVersionIndex, os);
}
}
protected void writeCellTypes(OutputStream os) throws IOException {
if (blockMeta.isAllSameType()) {
return;
}
for (int i = 0; i < numCells; ++i) {
int cellInsertionIndex = tokenizerNode.getFirstInsertionIndex() + i;
os.write(prefixTreeEncoder.getTypeBytes()[cellInsertionIndex]);
}
}
protected void writeValueOffsets(OutputStream os) throws IOException {
for (int i = 0; i < numCells; ++i) {
int cellInsertionIndex = tokenizerNode.getFirstInsertionIndex() + i;
long valueStartIndex = prefixTreeEncoder.getValueOffset(cellInsertionIndex);
UFIntTool.writeBytes(blockMeta.getValueOffsetWidth(), valueStartIndex, os);
}
}
protected void writeValueLengths(OutputStream os) throws IOException {
for (int i = 0; i < numCells; ++i) {
int cellInsertionIndex = tokenizerNode.getFirstInsertionIndex() + i;
int valueLength = prefixTreeEncoder.getValueLength(cellInsertionIndex);
UFIntTool.writeBytes(blockMeta.getValueLengthWidth(), valueLength, os);
}
}
/**
* If a branch or a nub, the last thing we append are the UFInt offsets to the child row nodes.
*/
protected void writeNextRowTrieNodeOffsets(OutputStream os) throws IOException {
ArrayList<TokenizerNode> children = tokenizerNode.getChildren();
for (int i = 0; i < children.size(); ++i) {
TokenizerNode child = children.get(i);
int distanceToChild = tokenizerNode.getNegativeIndex() - child.getNegativeIndex();
UFIntTool.writeBytes(blockMeta.getNextNodeOffsetWidth(), distanceToChild, os);
}
}
}

View File

@ -1,219 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.codec.prefixtree.encode.row;
import java.io.IOException;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.List;
import org.apache.yetus.audience.InterfaceAudience;
import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeBlockMeta;
import org.apache.hadoop.hbase.codec.prefixtree.encode.PrefixTreeEncoder;
import org.apache.hadoop.hbase.codec.prefixtree.encode.tokenize.TokenizerNode;
import org.apache.hadoop.hbase.util.vint.UFIntTool;
import org.apache.hadoop.hbase.shaded.com.google.common.collect.Lists;
/**
* Most of the complexity of the PrefixTree is contained in the "row section". It contains the row
* key trie structure used to search and recreate all the row keys. Each nub and leaf in this trie
* also contains references to offsets in the other sections of the data block that enable the
* decoder to match a row key with its qualifier, timestamp, type, value, etc.
* <p>
* The row section is a concatenated collection of {@link RowNodeWriter}s. See that class for the
* internals of each row node.
*/
@InterfaceAudience.Private
public class RowSectionWriter {
/***************** fields **************************/
protected PrefixTreeEncoder prefixTreeEncoder;
protected PrefixTreeBlockMeta blockMeta;
protected int numBytes;
protected ArrayList<TokenizerNode> nonLeaves;
protected ArrayList<TokenizerNode> leaves;
protected ArrayList<RowNodeWriter> leafWriters;
protected ArrayList<RowNodeWriter> nonLeafWriters;
protected int numLeafWriters;
protected int numNonLeafWriters;
/********************* construct **********************/
public RowSectionWriter() {
this.nonLeaves = Lists.newArrayList();
this.leaves = Lists.newArrayList();
this.leafWriters = Lists.newArrayList();
this.nonLeafWriters = Lists.newArrayList();
}
public RowSectionWriter(PrefixTreeEncoder prefixTreeEncoder) {
reconstruct(prefixTreeEncoder);
}
public void reconstruct(PrefixTreeEncoder prefixTreeEncoder) {
this.prefixTreeEncoder = prefixTreeEncoder;
this.blockMeta = prefixTreeEncoder.getBlockMeta();
reset();
}
public void reset() {
numBytes = 0;
nonLeaves.clear();
leaves.clear();
numLeafWriters = 0;
numNonLeafWriters = 0;
}
/****************** methods *******************************/
public RowSectionWriter compile() {
blockMeta.setMaxRowLength(prefixTreeEncoder.getRowTokenizer().getMaxElementLength());
prefixTreeEncoder.getRowTokenizer().setNodeFirstInsertionIndexes();
prefixTreeEncoder.getRowTokenizer().appendNodes(nonLeaves, true, false);
prefixTreeEncoder.getRowTokenizer().appendNodes(leaves, false, true);
// track the starting position of each node in final output
int negativeIndex = 0;
// create leaf writer nodes
// leaf widths are known at this point, so add them up
int totalLeafBytes = 0;
for (int i = leaves.size() - 1; i >= 0; --i) {
TokenizerNode leaf = leaves.get(i);
RowNodeWriter leafWriter = initializeWriter(leafWriters, numLeafWriters, leaf);
++numLeafWriters;
// leaves store all but their first token byte
int leafNodeWidth = leafWriter.calculateWidthOverrideOffsetWidth(0);
totalLeafBytes += leafNodeWidth;
negativeIndex += leafNodeWidth;
leaf.setNegativeIndex(negativeIndex);
}
int totalNonLeafBytesWithoutOffsets = 0;
int totalChildPointers = 0;
for (int i = nonLeaves.size() - 1; i >= 0; --i) {
TokenizerNode nonLeaf = nonLeaves.get(i);
RowNodeWriter nonLeafWriter = initializeWriter(nonLeafWriters, numNonLeafWriters, nonLeaf);
++numNonLeafWriters;
totalNonLeafBytesWithoutOffsets += nonLeafWriter.calculateWidthOverrideOffsetWidth(0);
totalChildPointers += nonLeaf.getNumChildren();
}
// figure out how wide our offset FInts are
int offsetWidth = 0;
while (true) {
++offsetWidth;
int offsetBytes = totalChildPointers * offsetWidth;
int totalRowBytes = totalNonLeafBytesWithoutOffsets + offsetBytes + totalLeafBytes;
if (totalRowBytes < UFIntTool.maxValueForNumBytes(offsetWidth)) {
// it fits
numBytes = totalRowBytes;
break;
}
}
blockMeta.setNextNodeOffsetWidth(offsetWidth);
// populate negativeIndexes
for (int i = nonLeaves.size() - 1; i >= 0; --i) {
TokenizerNode nonLeaf = nonLeaves.get(i);
int writerIndex = nonLeaves.size() - i - 1;
RowNodeWriter nonLeafWriter = nonLeafWriters.get(writerIndex);
int nodeWidth = nonLeafWriter.calculateWidth();
negativeIndex += nodeWidth;
nonLeaf.setNegativeIndex(negativeIndex);
}
return this;
}
protected RowNodeWriter initializeWriter(List<RowNodeWriter> list, int index,
TokenizerNode builderNode) {
RowNodeWriter rowNodeWriter = null;
//check if there is an existing node we can recycle
if (index >= list.size()) {
//there are not enough existing nodes, so add a new one which will be retrieved below
list.add(new RowNodeWriter(prefixTreeEncoder, builderNode));
}
rowNodeWriter = list.get(index);
rowNodeWriter.reset(builderNode);
return rowNodeWriter;
}
public void writeBytes(OutputStream os) throws IOException {
for (int i = numNonLeafWriters - 1; i >= 0; --i) {
RowNodeWriter nonLeafWriter = nonLeafWriters.get(i);
nonLeafWriter.write(os);
}
// duplicates above... written more for clarity right now
for (int i = numLeafWriters - 1; i >= 0; --i) {
RowNodeWriter leafWriter = leafWriters.get(i);
leafWriter.write(os);
}
}
/***************** static ******************************/
protected static ArrayList<TokenizerNode> filterByLeafAndReverse(
ArrayList<TokenizerNode> ins, boolean leaves) {
ArrayList<TokenizerNode> outs = Lists.newArrayList();
for (int i = ins.size() - 1; i >= 0; --i) {
TokenizerNode n = ins.get(i);
if (n.isLeaf() && leaves || (!n.isLeaf() && !leaves)) {
outs.add(ins.get(i));
}
}
return outs;
}
/************* get/set **************************/
public int getNumBytes() {
return numBytes;
}
public ArrayList<TokenizerNode> getNonLeaves() {
return nonLeaves;
}
public ArrayList<TokenizerNode> getLeaves() {
return leaves;
}
public ArrayList<RowNodeWriter> getNonLeafWriters() {
return nonLeafWriters;
}
public ArrayList<RowNodeWriter> getLeafWriters() {
return leafWriters;
}
}

View File

@ -1,241 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.codec.prefixtree.encode.tokenize;
import java.util.ArrayList;
import java.util.List;
import org.apache.yetus.audience.InterfaceAudience;
import org.apache.hadoop.hbase.util.ArrayUtils;
import org.apache.hadoop.hbase.util.ByteRange;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.CollectionUtils;
import org.apache.hadoop.hbase.shaded.com.google.common.collect.Lists;
/**
* Data structure used in the first stage of PrefixTree encoding:
* <ul>
* <li>accepts a sorted stream of ByteRanges
* <li>splits them into a set of tokens, each held by a {@link TokenizerNode}
* <li>connects the TokenizerNodes via standard java references
* <li>keeps a pool of TokenizerNodes and a reusable byte[] for holding all token content
* </ul>
* <p><br>
* Mainly used for turning Cell rowKeys into a trie, but also used for family and qualifier
* encoding.
*/
@InterfaceAudience.Private
public class Tokenizer{
/***************** fields **************************/
protected int numArraysAdded = 0;
protected long lastNodeId = -1;
protected ArrayList<TokenizerNode> nodes;
protected int numNodes;
protected TokenizerNode root;
protected byte[] tokens;
protected int tokensLength;
protected int maxElementLength = 0;
// number of levels in the tree assuming root level is 0
protected int treeDepth = 0;
/******************* construct *******************/
public Tokenizer() {
this.nodes = Lists.newArrayList();
this.tokens = new byte[0];
}
public void reset() {
numArraysAdded = 0;
lastNodeId = -1;
numNodes = 0;
tokensLength = 0;
root = null;
maxElementLength = 0;
treeDepth = 0;
}
/***************** building *************************/
public void addAll(ArrayList<ByteRange> sortedByteRanges) {
for (int i = 0; i < sortedByteRanges.size(); ++i) {
ByteRange byteRange = sortedByteRanges.get(i);
addSorted(byteRange);
}
}
public void addSorted(final ByteRange bytes) {
++numArraysAdded;
if (bytes.getLength() > maxElementLength) {
maxElementLength = bytes.getLength();
}
if (root == null) {
// nodeDepth of firstNode (non-root) is 1
root = addNode(null, 1, 0, bytes, 0);
} else {
root.addSorted(bytes);
}
}
public void incrementNumOccurrencesOfLatestValue(){
CollectionUtils.getLast(nodes).incrementNumOccurrences(1);
}
protected long nextNodeId() {
return ++lastNodeId;
}
protected TokenizerNode addNode(TokenizerNode parent, int nodeDepth, int tokenStartOffset,
final ByteRange token, int inputTokenOffset) {
int inputTokenLength = token.getLength() - inputTokenOffset;
int tokenOffset = appendTokenAndRepointByteRange(token, inputTokenOffset);
TokenizerNode node = null;
if (nodes.size() <= numNodes) {
node = new TokenizerNode(this, parent, nodeDepth, tokenStartOffset, tokenOffset,
inputTokenLength);
nodes.add(node);
} else {
node = nodes.get(numNodes);
node.reset();
node.reconstruct(this, parent, nodeDepth, tokenStartOffset, tokenOffset, inputTokenLength);
}
++numNodes;
return node;
}
protected int appendTokenAndRepointByteRange(final ByteRange token, int inputTokenOffset) {
int newOffset = tokensLength;
int inputTokenLength = token.getLength() - inputTokenOffset;
int newMinimum = tokensLength + inputTokenLength;
tokens = ArrayUtils.growIfNecessary(tokens, newMinimum, 2 * newMinimum);
token.deepCopySubRangeTo(inputTokenOffset, inputTokenLength, tokens, tokensLength);
tokensLength += inputTokenLength;
return newOffset;
}
protected void submitMaxNodeDepthCandidate(int nodeDepth) {
if (nodeDepth > treeDepth) {
treeDepth = nodeDepth;
}
}
/********************* read ********************/
public int getNumAdded(){
return numArraysAdded;
}
// for debugging
public ArrayList<TokenizerNode> getNodes(boolean includeNonLeaves, boolean includeLeaves) {
ArrayList<TokenizerNode> nodes = Lists.newArrayList();
root.appendNodesToExternalList(nodes, includeNonLeaves, includeLeaves);
return nodes;
}
public void appendNodes(List<TokenizerNode> appendTo, boolean includeNonLeaves,
boolean includeLeaves) {
root.appendNodesToExternalList(appendTo, includeNonLeaves, includeLeaves);
}
public List<byte[]> getArrays() {
List<TokenizerNode> nodes = new ArrayList<>();
root.appendNodesToExternalList(nodes, true, true);
List<byte[]> byteArrays = Lists.newArrayListWithCapacity(CollectionUtils.nullSafeSize(nodes));
for (int i = 0; i < nodes.size(); ++i) {
TokenizerNode node = nodes.get(i);
for (int j = 0; j < node.getNumOccurrences(); ++j) {
byte[] byteArray = node.getNewByteArray();
byteArrays.add(byteArray);
}
}
return byteArrays;
}
//currently unused, but working and possibly useful in the future
public void getNode(TokenizerRowSearchResult resultHolder, byte[] key, int keyOffset,
int keyLength) {
root.getNode(resultHolder, key, keyOffset, keyLength);
}
/********************** write ***************************/
public Tokenizer setNodeFirstInsertionIndexes() {
root.setInsertionIndexes(0);
return this;
}
public Tokenizer appendOutputArrayOffsets(List<Integer> offsets) {
root.appendOutputArrayOffsets(offsets);
return this;
}
/********************* print/debug ********************/
protected static final Boolean INCLUDE_FULL_TREE_IN_TO_STRING = false;
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
sb.append(getStructuralString());
if (INCLUDE_FULL_TREE_IN_TO_STRING) {
for (byte[] bytes : getArrays()) {
if (sb.length() > 0) {
sb.append("\n");
}
sb.append(Bytes.toString(bytes));
}
}
return sb.toString();
}
public String getStructuralString() {
List<TokenizerNode> nodes = getNodes(true, true);
StringBuilder sb = new StringBuilder();
for (TokenizerNode node : nodes) {
String line = node.getPaddedTokenAndOccurrenceString();
sb.append(line + "\n");
}
return sb.toString();
}
/****************** get/set ************************/
public TokenizerNode getRoot() {
return root;
}
public int getMaxElementLength() {
return maxElementLength;
}
public int getTreeDepth() {
return treeDepth;
}
}

View File

@ -1,639 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.codec.prefixtree.encode.tokenize;
import java.util.ArrayList;
import java.util.List;
import org.apache.yetus.audience.InterfaceAudience;
import org.apache.hadoop.hbase.util.ByteRange;
import org.apache.hadoop.hbase.util.ByteRangeUtils;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.CollectionUtils;
import org.apache.hadoop.hbase.util.SimpleMutableByteRange;
import org.apache.hadoop.hbase.util.Strings;
import org.apache.hadoop.hbase.shaded.com.google.common.collect.Lists;
/**
* Individual node in a Trie structure. Each node is one of 3 types:
* <ul>
* <li>Branch: an internal trie node that may have a token and must have multiple children, but does
* not represent an actual input byte[], hence its numOccurrences is 0
* <li>Leaf: a node with no children and where numOccurrences is &gt;= 1. It's token represents the
* last bytes in the input byte[]s.
* <li>Nub: a combination of a branch and leaf. Its token represents the last bytes of input
* byte[]s and has numOccurrences &gt;= 1, but it also has child nodes which represent input byte[]s
* that add bytes to this nodes input byte[].
* </ul>
* <br><br>
* Example inputs (numInputs=7):
* 0: AAA
* 1: AAA
* 2: AAB
* 3: AAB
* 4: AAB
* 5: AABQQ
* 6: AABQQ
* <br><br>
* Resulting TokenizerNodes:
* AA &lt;- branch, numOccurrences=0, tokenStartOffset=0, token.length=2
* A &lt;- leaf, numOccurrences=2, tokenStartOffset=2, token.length=1
* B &lt;- nub, numOccurrences=3, tokenStartOffset=2, token.length=1
* QQ &lt;- leaf, numOccurrences=2, tokenStartOffset=3, token.length=2
* <br><br>
* numInputs == 7 == sum(numOccurrences) == 0 + 2 + 3 + 2
*/
@InterfaceAudience.Private
public class TokenizerNode{
/*
* Ref to data structure wrapper
*/
protected Tokenizer builder;
/******************************************************************
* Tree content/structure used during tokenization
* ****************************************************************/
/*
* ref to parent trie node
*/
protected TokenizerNode parent;
/*
* node depth in trie, irrespective of each node's token length
*/
protected int nodeDepth;
/*
* start index of this token in original byte[]
*/
protected int tokenStartOffset;
/*
* bytes for this trie node. can be length 0 in root node
*/
protected ByteRange token;
/*
* A count of occurrences in the input byte[]s, not the trie structure. 0 for branch nodes, 1+ for
* nubs and leaves. If the same byte[] is added to the trie multiple times, this is the only thing
* that changes in the tokenizer. As a result, duplicate byte[]s are very inexpensive to encode.
*/
protected int numOccurrences;
/*
* The maximum fan-out of a byte[] trie is 256, so there are a maximum of 256
* child nodes.
*/
protected ArrayList<TokenizerNode> children;
/*
* Fields used later in the encoding process for sorting the nodes into the order they'll be
* written to the output byte[]. With these fields, the TokenizerNode and therefore Tokenizer
* are not generic data structures but instead are specific to HBase PrefixTree encoding.
*/
/*
* unique id assigned to each TokenizerNode
*/
protected long id;
/*
* set >=0 for nubs and leaves
*/
protected int firstInsertionIndex = -1;
/*
* A positive value indicating how many bytes before the end of the block this node will start. If
* the section is 55 bytes and negativeOffset is 9, then the node will start at 46.
*/
protected int negativeIndex = 0;
/*
* The offset in the output array at which to start writing this node's token bytes. Influenced
* by the lengths of all tokens sorted before this one.
*/
protected int outputArrayOffset = -1;
/*********************** construct *****************************/
public TokenizerNode(Tokenizer builder, TokenizerNode parent, int nodeDepth,
int tokenStartOffset, int tokenOffset, int tokenLength) {
this.token = new SimpleMutableByteRange();
reconstruct(builder, parent, nodeDepth, tokenStartOffset, tokenOffset, tokenLength);
this.children = Lists.newArrayList();
}
/*
* Sub-constructor for initializing all fields without allocating a new object. Used by the
* regular constructor.
*/
public void reconstruct(Tokenizer builder, TokenizerNode parent, int nodeDepth,
int tokenStartOffset, int tokenOffset, int tokenLength) {
this.builder = builder;
this.id = builder.nextNodeId();
this.parent = parent;
this.nodeDepth = nodeDepth;
builder.submitMaxNodeDepthCandidate(nodeDepth);
this.tokenStartOffset = tokenStartOffset;
this.token.set(builder.tokens, tokenOffset, tokenLength);
this.numOccurrences = 1;
}
/*
* Clear the state of this node so that it looks like it was just allocated.
*/
public void reset() {
builder = null;
parent = null;
nodeDepth = 0;
tokenStartOffset = 0;
token.unset();
numOccurrences = 0;
children.clear();// branches & nubs
// ids/offsets. used during writing to byte[]
id = 0;
firstInsertionIndex = -1;// set >=0 for nubs and leaves
negativeIndex = 0;
outputArrayOffset = -1;
}
/************************* building *********************************/
/*
* <li>Only public method used during the tokenization process
* <li>Requires that the input ByteRange sort after the previous, and therefore after all previous
* inputs
* <li>Only looks at bytes of the input array that align with this node's token
*/
public void addSorted(final ByteRange bytes) {// recursively build the tree
/*
* Recurse deeper into the existing trie structure
*/
if (matchesToken(bytes) && CollectionUtils.notEmpty(children)) {
TokenizerNode lastChild = CollectionUtils.getLast(children);
if (lastChild.partiallyMatchesToken(bytes)) {
lastChild.addSorted(bytes);
return;
}
}
/*
* Recursion ended. We must either
* <li>1: increment numOccurrences if this input was equal to the previous
* <li>2: convert this node from a leaf to a nub, and add a new child leaf
* <li>3: split this node into a branch and leaf, and then add a second leaf
*/
// add it as a child of this node
int numIdenticalTokenBytes = numIdenticalBytes(bytes);// should be <= token.length
int tailOffset = tokenStartOffset + numIdenticalTokenBytes;
int tailLength = bytes.getLength() - tailOffset;
if (numIdenticalTokenBytes == token.getLength()) {
if (tailLength == 0) {// identical to this node (case 1)
incrementNumOccurrences(1);
} else {// identical to this node, but with a few extra tailing bytes. (leaf -> nub) (case 2)
int childNodeDepth = nodeDepth + 1;
int childTokenStartOffset = tokenStartOffset + numIdenticalTokenBytes;
TokenizerNode newChildNode = builder.addNode(this, childNodeDepth, childTokenStartOffset,
bytes, tailOffset);
addChild(newChildNode);
}
} else {//numIdenticalBytes > 0, split into branch/leaf and then add second leaf (case 3)
split(numIdenticalTokenBytes, bytes);
}
}
protected void addChild(TokenizerNode node) {
node.setParent(this);
children.add(node);
}
/**
* Called when we need to convert a leaf node into a branch with 2 leaves. Comments inside the
* method assume we have token BAA starting at tokenStartOffset=0 and are adding BOO. The output
* will be 3 nodes:<br>
* <ul>
* <li>1: B &lt;- branch
* <li>2: AA &lt;- leaf
* <li>3: OO &lt;- leaf
* </ul>
*
* @param numTokenBytesToRetain =&gt; 1 (the B)
* @param bytes =&gt; BOO
*/
protected void split(int numTokenBytesToRetain, final ByteRange bytes) {
int childNodeDepth = nodeDepth;
int childTokenStartOffset = tokenStartOffset + numTokenBytesToRetain;
//create leaf AA
TokenizerNode firstChild = builder.addNode(this, childNodeDepth, childTokenStartOffset,
token, numTokenBytesToRetain);
firstChild.setNumOccurrences(numOccurrences);// do before clearing this node's numOccurrences
token.setLength(numTokenBytesToRetain);//shorten current token from BAA to B
numOccurrences = 0;//current node is now a branch
moveChildrenToDifferentParent(firstChild);//point the new leaf (AA) to the new branch (B)
addChild(firstChild);//add the new leaf (AA) to the branch's (B's) children
//create leaf OO
TokenizerNode secondChild = builder.addNode(this, childNodeDepth, childTokenStartOffset,
bytes, tokenStartOffset + numTokenBytesToRetain);
addChild(secondChild);//add the new leaf (00) to the branch's (B's) children
// we inserted branch node B as a new level above/before the two children, so increment the
// depths of the children below
firstChild.incrementNodeDepthRecursively();
secondChild.incrementNodeDepthRecursively();
}
protected void incrementNodeDepthRecursively() {
++nodeDepth;
builder.submitMaxNodeDepthCandidate(nodeDepth);
for (int i = 0; i < children.size(); ++i) {
children.get(i).incrementNodeDepthRecursively();
}
}
protected void moveChildrenToDifferentParent(TokenizerNode newParent) {
for (int i = 0; i < children.size(); ++i) {
TokenizerNode child = children.get(i);
child.setParent(newParent);
newParent.children.add(child);
}
children.clear();
}
/************************ byte[] utils *************************/
protected boolean partiallyMatchesToken(ByteRange bytes) {
return numIdenticalBytes(bytes) > 0;
}
protected boolean matchesToken(ByteRange bytes) {
return numIdenticalBytes(bytes) == getTokenLength();
}
protected int numIdenticalBytes(ByteRange bytes) {
return ByteRangeUtils.numEqualPrefixBytes(token, bytes, tokenStartOffset);
}
/***************** moving nodes around ************************/
public void appendNodesToExternalList(List<TokenizerNode> appendTo, boolean includeNonLeaves,
boolean includeLeaves) {
if (includeNonLeaves && !isLeaf() || includeLeaves && isLeaf()) {
appendTo.add(this);
}
for (int i = 0; i < children.size(); ++i) {
TokenizerNode child = children.get(i);
child.appendNodesToExternalList(appendTo, includeNonLeaves, includeLeaves);
}
}
public int setInsertionIndexes(int nextIndex) {
int newNextIndex = nextIndex;
if (hasOccurrences()) {
setFirstInsertionIndex(nextIndex);
newNextIndex += numOccurrences;
}
for (int i = 0; i < children.size(); ++i) {
TokenizerNode child = children.get(i);
newNextIndex = child.setInsertionIndexes(newNextIndex);
}
return newNextIndex;
}
public void appendOutputArrayOffsets(List<Integer> offsets) {
if (hasOccurrences()) {
offsets.add(outputArrayOffset);
}
for (int i = 0; i < children.size(); ++i) {
TokenizerNode child = children.get(i);
child.appendOutputArrayOffsets(offsets);
}
}
/***************** searching *********************************/
/*
* Do a trie style search through the tokenizer. One option for looking up families or qualifiers
* during encoding, but currently unused in favor of tracking this information as they are added.
*
* Keeping code pending further performance testing.
*/
public void getNode(TokenizerRowSearchResult resultHolder, byte[] key, int keyOffset,
int keyLength) {
int thisNodeDepthPlusLength = tokenStartOffset + token.getLength();
// quick check if the key is shorter than this node (may not work for binary search)
if (CollectionUtils.isEmpty(children)) {
if (thisNodeDepthPlusLength < keyLength) {// ran out of bytes
resultHolder.set(TokenizerRowSearchPosition.NO_MATCH, null);
return;
}
}
// all token bytes must match
for (int i = 0; i < token.getLength(); ++i) {
if (key[tokenStartOffset + keyOffset + i] != token.get(i)) {
// TODO return whether it's before or after so we can binary search
resultHolder.set(TokenizerRowSearchPosition.NO_MATCH, null);
return;
}
}
if (thisNodeDepthPlusLength == keyLength && numOccurrences > 0) {
resultHolder.set(TokenizerRowSearchPosition.MATCH, this);// MATCH
return;
}
if (CollectionUtils.notEmpty(children)) {
// TODO binary search the children
for (int i = 0; i < children.size(); ++i) {
TokenizerNode child = children.get(i);
child.getNode(resultHolder, key, keyOffset, keyLength);
if (resultHolder.isMatch()) {
return;
} else if (resultHolder.getDifference() == TokenizerRowSearchPosition.BEFORE) {
// passed it, so it doesn't exist
resultHolder.set(TokenizerRowSearchPosition.NO_MATCH, null);
return;
}
// key is still AFTER the current node, so continue searching
}
}
// checked all children (or there were no children), and didn't find it
resultHolder.set(TokenizerRowSearchPosition.NO_MATCH, null);
return;
}
/****************** writing back to byte[]'s *************************/
public byte[] getNewByteArray() {
byte[] arrayToFill = new byte[tokenStartOffset + token.getLength()];
fillInBytes(arrayToFill);
return arrayToFill;
}
public void fillInBytes(byte[] arrayToFill) {
for (int i = 0; i < token.getLength(); ++i) {
arrayToFill[tokenStartOffset + i] = token.get(i);
}
if (parent != null) {
parent.fillInBytes(arrayToFill);
}
}
/************************** printing ***********************/
@Override
public String toString() {
String s = "";
if (parent == null) {
s += "R ";
} else {
s += getBnlIndicator(false) + " " + Bytes.toString(parent.getNewByteArray());
}
s += "[" + Bytes.toString(token.deepCopyToNewArray()) + "]";
if (numOccurrences > 0) {
s += "x" + numOccurrences;
}
return s;
}
public String getPaddedTokenAndOccurrenceString() {
StringBuilder sb = new StringBuilder();
sb.append(getBnlIndicator(true));
sb.append(Strings.padFront(numOccurrences + "", ' ', 3));
sb.append(Strings.padFront(nodeDepth + "", ' ', 3));
if (outputArrayOffset >= 0) {
sb.append(Strings.padFront(outputArrayOffset + "", ' ', 3));
}
sb.append(" ");
for (int i = 0; i < tokenStartOffset; ++i) {
sb.append(" ");
}
sb.append(Bytes.toString(token.deepCopyToNewArray()).replaceAll(" ", "_"));
return sb.toString();
}
public String getBnlIndicator(boolean indent) {
if (indent) {
if (isNub()) {
return " N ";
}
return isBranch() ? "B " : " L";
}
if (isNub()) {
return "N";
}
return isBranch() ? "B" : "L";
}
/********************** count different node types ********************/
public int getNumBranchNodesIncludingThisNode() {
if (isLeaf()) {
return 0;
}
int totalFromThisPlusChildren = isBranch() ? 1 : 0;
for (int i = 0; i < children.size(); ++i) {
TokenizerNode child = children.get(i);
totalFromThisPlusChildren += child.getNumBranchNodesIncludingThisNode();
}
return totalFromThisPlusChildren;
}
public int getNumNubNodesIncludingThisNode() {
if (isLeaf()) {
return 0;
}
int totalFromThisPlusChildren = isNub() ? 1 : 0;
for (int i = 0; i < children.size(); ++i) {
TokenizerNode child = children.get(i);
totalFromThisPlusChildren += child.getNumNubNodesIncludingThisNode();
}
return totalFromThisPlusChildren;
}
public int getNumLeafNodesIncludingThisNode() {
if (isLeaf()) {
return 1;
}
int totalFromChildren = 0;
for (int i = 0; i < children.size(); ++i) {
TokenizerNode child = children.get(i);
totalFromChildren += child.getNumLeafNodesIncludingThisNode();
}
return totalFromChildren;
}
/*********************** simple read-only methods *******************************/
public int getNodeDepth() {
return nodeDepth;
}
public int getTokenLength() {
return token.getLength();
}
public boolean hasOccurrences() {
return numOccurrences > 0;
}
public boolean isRoot() {
return this.parent == null;
}
public int getNumChildren() {
return CollectionUtils.nullSafeSize(children);
}
public TokenizerNode getLastChild() {
if (CollectionUtils.isEmpty(children)) {
return null;
}
return CollectionUtils.getLast(children);
}
public boolean isLeaf() {
return CollectionUtils.isEmpty(children) && hasOccurrences();
}
public boolean isBranch() {
return CollectionUtils.notEmpty(children) && !hasOccurrences();
}
public boolean isNub() {
return CollectionUtils.notEmpty(children) && hasOccurrences();
}
/********************** simple mutation methods *************************/
/**
* Each occurrence &gt; 1 indicates a repeat of the previous entry.
* This can be called directly by
* an external class without going through the process of detecting a repeat if it is a known
* repeat by some external mechanism. PtEncoder uses this when adding cells to a row if it knows
* the new cells are part of the current row.
* @param d increment by this amount
*/
public void incrementNumOccurrences(int d) {
numOccurrences += d;
}
/************************* autogenerated get/set ******************/
public int getTokenOffset() {
return tokenStartOffset;
}
public TokenizerNode getParent() {
return parent;
}
public ByteRange getToken() {
return token;
}
public int getNumOccurrences() {
return numOccurrences;
}
public void setParent(TokenizerNode parent) {
this.parent = parent;
}
public void setNumOccurrences(int numOccurrences) {
this.numOccurrences = numOccurrences;
}
public ArrayList<TokenizerNode> getChildren() {
return children;
}
public long getId() {
return id;
}
public int getFirstInsertionIndex() {
return firstInsertionIndex;
}
public void setFirstInsertionIndex(int firstInsertionIndex) {
this.firstInsertionIndex = firstInsertionIndex;
}
public int getNegativeIndex() {
return negativeIndex;
}
public void setNegativeIndex(int negativeIndex) {
this.negativeIndex = negativeIndex;
}
public int getOutputArrayOffset() {
return outputArrayOffset;
}
public void setOutputArrayOffset(int outputArrayOffset) {
this.outputArrayOffset = outputArrayOffset;
}
public void setId(long id) {
this.id = id;
}
public void setBuilder(Tokenizer builder) {
this.builder = builder;
}
public void setTokenOffset(int tokenOffset) {
this.tokenStartOffset = tokenOffset;
}
public void setToken(ByteRange token) {
this.token = token;
}
}

View File

@ -1,38 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.codec.prefixtree.encode.tokenize;
import org.apache.yetus.audience.InterfaceAudience;
/**
* Warning: currently unused, but code is valid. Pending performance testing on more data sets.
*
* Where is the key relative to our current position in the tree. For example, the current tree node
* is "BEFORE" the key we are seeking
*/
@InterfaceAudience.Private
public enum TokenizerRowSearchPosition {
AFTER,//the key is after this tree node, so keep searching
BEFORE,//in a binary search, this tells us to back up
MATCH,//the current node is a full match
NO_MATCH,//might as well return a value more informative than null
}

View File

@ -1,73 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.codec.prefixtree.encode.tokenize;
import org.apache.yetus.audience.InterfaceAudience;
/**
* for recursively searching a PtBuilder
*/
@InterfaceAudience.Private
public class TokenizerRowSearchResult{
/************ fields ************************/
protected TokenizerRowSearchPosition difference;
protected TokenizerNode matchingNode;
/*************** construct *****************/
public TokenizerRowSearchResult() {
}
public TokenizerRowSearchResult(TokenizerRowSearchPosition difference) {
this.difference = difference;
}
public TokenizerRowSearchResult(TokenizerNode matchingNode) {
this.difference = TokenizerRowSearchPosition.MATCH;
this.matchingNode = matchingNode;
}
/*************** methods **********************/
public boolean isMatch() {
return TokenizerRowSearchPosition.MATCH == difference;
}
/************* get/set ***************************/
public TokenizerRowSearchPosition getDifference() {
return difference;
}
public TokenizerNode getMatchingNode() {
return matchingNode;
}
public void set(TokenizerRowSearchPosition difference, TokenizerNode matchingNode) {
this.difference = difference;
this.matchingNode = matchingNode;
}
}

View File

@ -1,67 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.codec.prefixtree.scanner;
import org.apache.yetus.audience.InterfaceAudience;
/**
* An indicator of the state of the scanner after an operation such as nextCell() or
* positionAt(..). For example:
* <ul>
* <li>In a DataBlockScanner, the AFTER_LAST position indicates to the parent StoreFileScanner that
* it should load the next block.</li>
* <li>In a StoreFileScanner, the AFTER_LAST position indicates that the file has been exhausted.
* </li>
* <li>In a RegionScanner, the AFTER_LAST position indicates that the scanner should move to the
* next region.</li>
* </ul>
*/
@InterfaceAudience.Private
public enum CellScannerPosition {
/**
* getCurrentCell() will NOT return a valid cell. Calling nextCell() will advance to the first
* cell.
*/
BEFORE_FIRST,
/**
* getCurrentCell() will return a valid cell, but it is not the cell requested by positionAt(..),
* rather it is the nearest cell before the requested cell.
*/
BEFORE,
/**
* getCurrentCell() will return a valid cell, and it is exactly the cell that was requested by
* positionAt(..).
*/
AT,
/**
* getCurrentCell() will return a valid cell, but it is not the cell requested by positionAt(..),
* rather it is the nearest cell after the requested cell.
*/
AFTER,
/**
* getCurrentCell() will NOT return a valid cell. Calling nextCell() will have no effect.
*/
AFTER_LAST
}

View File

@ -1,118 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.codec.prefixtree.scanner;
import org.apache.yetus.audience.InterfaceAudience;
import org.apache.hadoop.hbase.Cell;
/**
* Methods for seeking to a random {@link Cell} inside a sorted collection of cells. Indicates that
* the implementation is able to navigate between cells without iterating through every cell.
*/
@InterfaceAudience.Private
public interface CellSearcher extends ReversibleCellScanner {
/**
* Reset any state in the scanner so it appears it was freshly opened.
*/
void resetToBeforeFirstEntry();
/**
* <p>
* Do everything within this scanner's power to find the key. Look forward and backwards.
* </p>
* <p>
* Abort as soon as we know it can't be found, possibly leaving the Searcher in an invalid state.
* </p>
* @param key position the CellScanner exactly on this key
* @return true if the cell existed and getCurrentCell() holds a valid cell
*/
boolean positionAt(Cell key);
/**
* <p>
* Same as positionAt(..), but go to the extra effort of finding the previous key if there's no
* exact match.
* </p>
* @param key position the CellScanner on this key or the closest cell before
* @return AT if exact match<br>
* BEFORE if on last cell before key<br>
* BEFORE_FIRST if key was before the first cell in this scanner's scope
*/
CellScannerPosition positionAtOrBefore(Cell key);
/**
* <p>
* Same as positionAt(..), but go to the extra effort of finding the next key if there's no exact
* match.
* </p>
* @param key position the CellScanner on this key or the closest cell after
* @return AT if exact match<br>
* AFTER if on first cell after key<br>
* AFTER_LAST if key was after the last cell in this scanner's scope
*/
CellScannerPosition positionAtOrAfter(Cell key);
/**
* <p>
* Note: Added for backwards compatibility with
* org.apache.hadoop.hbase.regionserver.KeyValueScanner#reseek(Cell)
* </p><p>
* Look for the key, but only look after the current position. Probably not needed for an
* efficient tree implementation, but is important for implementations without random access such
* as unencoded KeyValue blocks.
* </p>
* @param key position the CellScanner exactly on this key
* @return true if getCurrent() holds a valid cell
*/
boolean seekForwardTo(Cell key);
/**
* <p>
* Same as seekForwardTo(..), but go to the extra effort of finding the next key if there's no
* exact match.
* </p>
* @param key
* @return AT if exact match<br>
* AFTER if on first cell after key<br>
* AFTER_LAST if key was after the last cell in this scanner's scope
*/
CellScannerPosition seekForwardToOrBefore(Cell key);
/**
* <p>
* Same as seekForwardTo(..), but go to the extra effort of finding the next key if there's no
* exact match.
* </p>
* @param key
* @return AT if exact match<br>
* AFTER if on first cell after key<br>
* AFTER_LAST if key was after the last cell in this scanner's scope
*/
CellScannerPosition seekForwardToOrAfter(Cell key);
/**
* <p>
* Note: This may not be appropriate to have in the interface. Need to investigate.
* </p>
* Position the scanner in an invalid state after the last cell: CellScannerPosition.AFTER_LAST.
* This is used by tests and for handling certain edge cases.
*/
void positionAfterLastCell();
}

View File

@ -1,55 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.codec.prefixtree.scanner;
import org.apache.yetus.audience.InterfaceAudience;
import org.apache.hadoop.hbase.CellScanner;
/**
* An extension of CellScanner indicating the scanner supports iterating backwards through cells.
* <p>
* Note: This was not added to suggest that HBase should support client facing reverse Scanners,
* but
* because some {@link CellSearcher} implementations, namely PrefixTree, need a method of backing
* up if the positionAt(..) method goes past the requested cell.
*/
@InterfaceAudience.Private
public interface ReversibleCellScanner extends CellScanner {
/**
* Try to position the scanner one Cell before the current position.
* @return true if the operation was successful, meaning getCurrentCell() will return a valid
* Cell.<br>
* false if there were no previous cells, meaning getCurrentCell() will return null.
* Scanner position will be
* {@link org.apache.hadoop.hbase.codec.prefixtree.scanner.CellScannerPosition#BEFORE_FIRST}
*/
boolean previous();
/**
* Try to position the scanner in the row before the current row.
* @param endOfRow true for the last cell in the previous row; false for the first cell
* @return true if the operation was successful, meaning getCurrentCell() will return a valid
* Cell.<br>
* false if there were no previous cells, meaning getCurrentCell() will return null.
* Scanner position will be
* {@link org.apache.hadoop.hbase.codec.prefixtree.scanner.CellScannerPosition#BEFORE_FIRST}
*/
boolean previousRow(boolean endOfRow);
}

View File

@ -1,181 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.util.byterange;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import org.apache.yetus.audience.InterfaceAudience;
import org.apache.hadoop.hbase.util.ArrayUtils;
import org.apache.hadoop.hbase.util.ByteRange;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.SimpleMutableByteRange;
import org.apache.hadoop.hbase.shaded.com.google.common.collect.Lists;
/**
* Performance oriented class for de-duping and storing arbitrary byte[]'s arriving in non-sorted
* order. Appends individual byte[]'s to a single big byte[] to avoid overhead and garbage.
* <p>
* Current implementations are {@link org.apache.hadoop.hbase.util.byterange.impl.ByteRangeHashSet} and
* {@link org.apache.hadoop.hbase.util.byterange.impl.ByteRangeTreeSet}, but other options might be a
* trie-oriented ByteRangeTrieSet, etc
*/
@InterfaceAudience.Private
public abstract class ByteRangeSet {
/******************** fields **********************/
protected byte[] byteAppender;
protected int numBytes;
protected Map<ByteRange, Integer> uniqueIndexByUniqueRange;
protected ArrayList<ByteRange> uniqueRanges;
protected int numUniqueRanges = 0;
protected int[] uniqueRangeIndexByInsertionId;
protected int numInputs;
protected List<Integer> sortedIndexByUniqueIndex;
protected int[] sortedIndexByInsertionId;
protected ArrayList<ByteRange> sortedRanges;
/****************** construct **********************/
protected ByteRangeSet() {
this.byteAppender = new byte[0];
this.uniqueRanges = Lists.newArrayList();
this.uniqueRangeIndexByInsertionId = new int[0];
this.sortedIndexByUniqueIndex = Lists.newArrayList();
this.sortedIndexByInsertionId = new int[0];
this.sortedRanges = Lists.newArrayList();
}
public void reset() {
numBytes = 0;
uniqueIndexByUniqueRange.clear();
numUniqueRanges = 0;
numInputs = 0;
sortedIndexByUniqueIndex.clear();
sortedRanges.clear();
}
/*************** abstract *************************/
public abstract void addToSortedRanges();
/**************** methods *************************/
/**
* Check if the incoming byte range exists. If not, add it to the backing byteAppender[] and
* insert it into the tracking Map uniqueIndexByUniqueRange.
*/
public void add(ByteRange bytes) {
Integer index = uniqueIndexByUniqueRange.get(bytes);
if (index == null) {
index = store(bytes);
}
int minLength = numInputs + 1;
uniqueRangeIndexByInsertionId = ArrayUtils.growIfNecessary(uniqueRangeIndexByInsertionId,
minLength, 2 * minLength);
uniqueRangeIndexByInsertionId[numInputs] = index;
++numInputs;
}
protected int store(ByteRange bytes) {
int indexOfNewElement = numUniqueRanges;
if (uniqueRanges.size() <= numUniqueRanges) {
uniqueRanges.add(new SimpleMutableByteRange());
}
ByteRange storedRange = uniqueRanges.get(numUniqueRanges);
int neededBytes = numBytes + bytes.getLength();
byteAppender = ArrayUtils.growIfNecessary(byteAppender, neededBytes, 2 * neededBytes);
bytes.deepCopyTo(byteAppender, numBytes);
storedRange.set(byteAppender, numBytes, bytes.getLength());// this isn't valid yet
numBytes += bytes.getLength();
uniqueIndexByUniqueRange.put(storedRange, indexOfNewElement);
int newestUniqueIndex = numUniqueRanges;
++numUniqueRanges;
return newestUniqueIndex;
}
public ByteRangeSet compile() {
addToSortedRanges();
for (int i = 0; i < sortedRanges.size(); ++i) {
sortedIndexByUniqueIndex.add(null);// need to grow the size
}
// TODO move this to an invert(int[]) util method
for (int i = 0; i < sortedIndexByUniqueIndex.size(); ++i) {
int uniqueIndex = uniqueIndexByUniqueRange.get(sortedRanges.get(i));
sortedIndexByUniqueIndex.set(uniqueIndex, i);
}
sortedIndexByInsertionId = ArrayUtils.growIfNecessary(sortedIndexByInsertionId, numInputs,
numInputs);
for (int i = 0; i < numInputs; ++i) {
int uniqueRangeIndex = uniqueRangeIndexByInsertionId[i];
int sortedIndex = sortedIndexByUniqueIndex.get(uniqueRangeIndex);
sortedIndexByInsertionId[i] = sortedIndex;
}
return this;
}
public int getSortedIndexForInsertionId(int insertionId) {
return sortedIndexByInsertionId[insertionId];
}
public int size() {
return uniqueIndexByUniqueRange.size();
}
/***************** standard methods ************************/
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
int i = 0;
for (ByteRange r : sortedRanges) {
if (i > 0) {
sb.append("\n");
}
sb.append(i + " " + Bytes.toStringBinary(r.deepCopyToNewArray()));
++i;
}
sb.append("\ntotalSize:" + numBytes);
sb.append("\navgSize:" + getAvgSize());
return sb.toString();
}
/**************** get/set *****************************/
public ArrayList<ByteRange> getSortedRanges() {
return sortedRanges;
}
public long getAvgSize() {
return numBytes / numUniqueRanges;
}
}

View File

@ -1,57 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.util.byterange.impl;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import org.apache.yetus.audience.InterfaceAudience;
import org.apache.hadoop.hbase.util.ByteRange;
import org.apache.hadoop.hbase.util.CollectionUtils;
import org.apache.hadoop.hbase.util.IterableUtils;
import org.apache.hadoop.hbase.util.byterange.ByteRangeSet;
/**
* This is probably the best implementation of ByteRangeSet at the moment, though a HashMap produces
* garbage when adding a new element to it. We can probably create a tighter implementation without
* pointers or garbage.
*/
@InterfaceAudience.Private
public class ByteRangeHashSet extends ByteRangeSet {
/************************ constructors *****************************/
public ByteRangeHashSet() {
this.uniqueIndexByUniqueRange = new HashMap<>();
}
public ByteRangeHashSet(List<ByteRange> rawByteArrays) {
for (ByteRange in : IterableUtils.nullSafe(rawByteArrays)) {
add(in);
}
}
@Override
public void addToSortedRanges() {
sortedRanges.addAll(CollectionUtils.nullSafe(uniqueIndexByUniqueRange.keySet()));
Collections.sort(sortedRanges);
}
}

View File

@ -1,54 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.util.byterange.impl;
import java.util.List;
import java.util.TreeMap;
import org.apache.yetus.audience.InterfaceAudience;
import org.apache.hadoop.hbase.util.ByteRange;
import org.apache.hadoop.hbase.util.CollectionUtils;
import org.apache.hadoop.hbase.util.IterableUtils;
import org.apache.hadoop.hbase.util.byterange.ByteRangeSet;
/**
* Not currently used in production, but here as a benchmark comparison against ByteRangeHashSet.
*/
@InterfaceAudience.Private
public class ByteRangeTreeSet extends ByteRangeSet {
/************************ constructors *****************************/
public ByteRangeTreeSet() {
this.uniqueIndexByUniqueRange = new TreeMap<>();
}
public ByteRangeTreeSet(List<ByteRange> rawByteArrays) {
this();//needed to initialize the TreeSet
for(ByteRange in : IterableUtils.nullSafe(rawByteArrays)){
add(in);
}
}
@Override
public void addToSortedRanges() {
sortedRanges.addAll(CollectionUtils.nullSafe(uniqueIndexByUniqueRange.keySet()));
}
}

View File

@ -1,117 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.util.vint;
import java.io.IOException;
import java.io.OutputStream;
import org.apache.yetus.audience.InterfaceAudience;
import org.apache.hadoop.hbase.nio.ByteBuff;
/**
* UFInt is an abbreviation for Unsigned Fixed-width Integer.
*
* This class converts between positive ints and 1-4 bytes that represent the int. All input ints
* must be positive. Max values stored in N bytes are:
*
* N=1: 2^8 =&gt; 256
* N=2: 2^16 =&gt; 65,536
* N=3: 2^24 =&gt; 16,777,216
* N=4: 2^31 =&gt; 2,147,483,648 (Integer.MAX_VALUE)
*
* This was created to get most of the memory savings of a variable length integer when encoding
* an array of input integers, but to fix the number of bytes for each integer to the number needed
* to store the maximum integer in the array. This enables a binary search to be performed on the
* array of encoded integers.
*
* PrefixTree nodes often store offsets into a block that can fit into 1 or 2 bytes. Note that if
* the maximum value of an array of numbers needs 2 bytes, then it's likely that a majority of the
* numbers will also require 2 bytes.
*
* warnings:
* * no input validation for max performance
* * no negatives
*/
@InterfaceAudience.Private
public class UFIntTool {
private static final int NUM_BITS_IN_LONG = 64;
public static long maxValueForNumBytes(int numBytes) {
return (1L << (numBytes * 8)) - 1;
}
public static int numBytes(final long value) {
if (value == 0) {// 0 doesn't work with the formula below
return 1;
}
return (NUM_BITS_IN_LONG + 7 - Long.numberOfLeadingZeros(value)) / 8;
}
public static byte[] getBytes(int outputWidth, final long value) {
byte[] bytes = new byte[outputWidth];
writeBytes(outputWidth, value, bytes, 0);
return bytes;
}
public static void writeBytes(int outputWidth, final long value, byte[] bytes, int offset) {
bytes[offset + outputWidth - 1] = (byte) value;
for (int i = outputWidth - 2; i >= 0; --i) {
bytes[offset + i] = (byte) (value >>> (outputWidth - i - 1) * 8);
}
}
private static final long[] MASKS = new long[] {
(long) 255,
(long) 255 << 8,
(long) 255 << 16,
(long) 255 << 24,
(long) 255 << 32,
(long) 255 << 40,
(long) 255 << 48,
(long) 255 << 56
};
public static void writeBytes(int outputWidth, final long value, OutputStream os) throws IOException {
for (int i = outputWidth - 1; i >= 0; --i) {
os.write((byte) ((value & MASKS[i]) >>> (8 * i)));
}
}
public static long fromBytes(final byte[] bytes) {
long value = 0;
value |= bytes[0] & 0xff;// these seem to do ok without casting the byte to int
for (int i = 1; i < bytes.length; ++i) {
value <<= 8;
value |= bytes[i] & 0xff;
}
return value;
}
public static long fromBytes(final ByteBuff buf, final int offset, final int width) {
long value = 0;
value |= buf.get(offset + 0) & 0xff;// these seem to do ok without casting the byte to int
for (int i = 1; i < width; ++i) {
value <<= 8;
value |= buf.get(i + offset) & 0xff;
}
return value;
}
}

View File

@ -1,112 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.util.vint;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import org.apache.yetus.audience.InterfaceAudience;
import org.apache.hadoop.hbase.nio.ByteBuff;
/**
* Simple Variable Length Integer encoding. Left bit of 0 means we are on the last byte. If left
* bit of the current byte is 1, then there is at least one more byte.
*/
@InterfaceAudience.Private
public class UVIntTool {
public static final byte
BYTE_7_RIGHT_BITS_SET = 127,
BYTE_LEFT_BIT_SET = -128;
public static final long
INT_7_RIGHT_BITS_SET = 127,
INT_8TH_BIT_SET = 128;
public static final byte[]
MAX_VALUE_BYTES = new byte[] { -1, -1, -1, -1, 7 };
/********************* int -&gt; bytes **************************/
public static int numBytes(int in) {
if (in == 0) {
// doesn't work with the formula below
return 1;
}
return (38 - Integer.numberOfLeadingZeros(in)) / 7;// 38 comes from 32+(7-1)
}
public static byte[] getBytes(int value) {
int numBytes = numBytes(value);
byte[] bytes = new byte[numBytes];
int remainder = value;
for (int i = 0; i < numBytes - 1; ++i) {
// set the left bit
bytes[i] = (byte) ((remainder & INT_7_RIGHT_BITS_SET) | INT_8TH_BIT_SET);
remainder >>= 7;
}
// do not set the left bit
bytes[numBytes - 1] = (byte) (remainder & INT_7_RIGHT_BITS_SET);
return bytes;
}
public static int writeBytes(int value, OutputStream os) throws IOException {
int numBytes = numBytes(value);
int remainder = value;
for (int i = 0; i < numBytes - 1; ++i) {
// set the left bit
os.write((byte) ((remainder & INT_7_RIGHT_BITS_SET) | INT_8TH_BIT_SET));
remainder >>= 7;
}
// do not set the left bit
os.write((byte) (remainder & INT_7_RIGHT_BITS_SET));
return numBytes;
}
/******************** bytes -&gt; int **************************/
public static int getInt(ByteBuff buffer, int offset) {
int value = 0;
for (int i = 0;; ++i) {
byte b = buffer.get(offset + i);
int shifted = BYTE_7_RIGHT_BITS_SET & b;// kill leftmost bit
shifted <<= 7 * i;
value |= shifted;
if (b >= 0) {
break;
}
}
return value;
}
public static int getInt(InputStream is) throws IOException {
int value = 0;
int i = 0;
int b;
do{
b = is.read();
int shifted = BYTE_7_RIGHT_BITS_SET & b;// kill leftmost bit
shifted <<= 7 * i;
value |= shifted;
++i;
}while(b > Byte.MAX_VALUE);
return value;
}
}

View File

@ -1,116 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.util.vint;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.nio.ByteBuffer;
import org.apache.yetus.audience.InterfaceAudience;
import org.apache.hadoop.hbase.nio.ByteBuff;
import org.apache.hadoop.hbase.nio.SingleByteBuff;
/**
* Simple Variable Length Integer encoding. Left bit of 0 means we are on the last byte. If left
* bit of the current byte is 1, then there is at least one more byte.
*/
@InterfaceAudience.Private
public class UVLongTool{
public static final byte
BYTE_7_RIGHT_BITS_SET = 127,
BYTE_LEFT_BIT_SET = -128;
public static final long
LONG_7_RIGHT_BITS_SET = 127,
LONG_8TH_BIT_SET = 128;
public static final byte[]
MAX_VALUE_BYTES = new byte[] { -1, -1, -1, -1, -1, -1, -1, -1, 127 };
/********************* long -&gt; bytes **************************/
public static int numBytes(long in) {// do a check for illegal arguments if not protected
if (in == 0) {
return 1;
}// doesn't work with the formula below
return (70 - Long.numberOfLeadingZeros(in)) / 7;// 70 comes from 64+(7-1)
}
public static byte[] getBytes(long value) {
int numBytes = numBytes(value);
byte[] bytes = new byte[numBytes];
long remainder = value;
for (int i = 0; i < numBytes - 1; ++i) {
bytes[i] = (byte) ((remainder & LONG_7_RIGHT_BITS_SET) | LONG_8TH_BIT_SET);// set the left bit
remainder >>= 7;
}
bytes[numBytes - 1] = (byte) (remainder & LONG_7_RIGHT_BITS_SET);// do not set the left bit
return bytes;
}
public static int writeBytes(long value, OutputStream os) throws IOException {
int numBytes = numBytes(value);
long remainder = value;
for (int i = 0; i < numBytes - 1; ++i) {
// set the left bit
os.write((byte) ((remainder & LONG_7_RIGHT_BITS_SET) | LONG_8TH_BIT_SET));
remainder >>= 7;
}
// do not set the left bit
os.write((byte) (remainder & LONG_7_RIGHT_BITS_SET));
return numBytes;
}
/******************** bytes -&gt; long **************************/
public static long getLong(byte[] bytes) {
return getLong(new SingleByteBuff(ByteBuffer.wrap(bytes)), 0);
}
public static long getLong(ByteBuff buf, int offset) {
long value = 0;
for (int i = 0;; ++i) {
byte b = buf.get(offset + i);
long shifted = BYTE_7_RIGHT_BITS_SET & b;// kill leftmost bit
shifted <<= 7 * i;
value |= shifted;
if (b >= 0) {
break;
}// first bit was 0, so that's the last byte in the VarLong
}
return value;
}
public static long getLong(InputStream is) throws IOException {
long value = 0;
int i = 0;
int b;
do {
b = is.read();
long shifted = BYTE_7_RIGHT_BITS_SET & b;// kill leftmost bit
shifted <<= 7 * i;
value |= shifted;
++i;
} while (b > Byte.MAX_VALUE);
return value;
}
}

View File

@ -1,65 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.codec.keyvalue;
import java.nio.ByteBuffer;
import java.util.Collection;
import java.util.List;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.KeyValueTestUtil;
import org.apache.hadoop.hbase.testclassification.MiscTests;
import org.apache.hadoop.hbase.testclassification.SmallTests;
import org.apache.hadoop.hbase.codec.prefixtree.row.TestRowData;
import org.apache.hadoop.hbase.codec.prefixtree.row.data.TestRowDataRandomKeyValuesWithTags;
import org.apache.hadoop.hbase.codec.prefixtree.row.data.TestRowDataTrivialWithTags;
import org.junit.Assert;
import org.junit.Assume;
import org.junit.Test;
import org.junit.experimental.categories.Category;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
import org.junit.runners.Parameterized.Parameters;
import static org.junit.Assume.assumeFalse;
@Category({MiscTests.class, SmallTests.class})
@RunWith(Parameterized.class)
public class TestKeyValueTool {
@Parameters
public static Collection<Object[]> parameters() {
return TestRowData.InMemory.getAllAsObjectArray();
}
@Parameterized.Parameter
public TestRowData rows;
@Test
public void testRoundTripToBytes() {
assumeFalse(rows instanceof TestRowDataTrivialWithTags);
assumeFalse(rows instanceof TestRowDataRandomKeyValuesWithTags);
List<KeyValue> kvs = rows.getInputs();
ByteBuffer bb = KeyValueTestUtil.toByteBufferAndRewind(kvs, false);
List<KeyValue> roundTrippedKvs = KeyValueTestUtil.rewindThenToList(bb, false, false);
Assert.assertArrayEquals(kvs.toArray(), roundTrippedKvs.toArray());
}
}

View File

@ -1,27 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.codec.prefixtree;
import org.apache.hadoop.hbase.util.Bytes;
public class PrefixTreeTestConstants {
public static final byte[] TEST_CF = Bytes.toBytes("cfDefault");
}

View File

@ -1,91 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.codec.prefixtree.blockmeta;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.nio.ByteBuffer;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.nio.SingleByteBuff;
import org.apache.hadoop.hbase.testclassification.MiscTests;
import org.apache.hadoop.hbase.testclassification.SmallTests;
import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeBlockMeta;
import org.junit.Assert;
import org.junit.Test;
import org.junit.experimental.categories.Category;
@Category({MiscTests.class, SmallTests.class})
public class TestBlockMeta {
static int BLOCK_START = 123;
private static PrefixTreeBlockMeta createSample() {
PrefixTreeBlockMeta m = new PrefixTreeBlockMeta();
m.setNumMetaBytes(0);
m.setNumKeyValueBytes(3195);
m.setNumRowBytes(0);
m.setNumFamilyBytes(3);
m.setNumQualifierBytes(12345);
m.setNumTagsBytes(50);
m.setNumTimestampBytes(23456);
m.setNumMvccVersionBytes(5);
m.setNumValueBytes(34567);
m.setNextNodeOffsetWidth(3);
m.setFamilyOffsetWidth(1);
m.setQualifierOffsetWidth(2);
m.setTagsOffsetWidth(2);
m.setTimestampIndexWidth(1);
m.setMvccVersionIndexWidth(2);
m.setValueOffsetWidth(8);
m.setValueLengthWidth(3);
m.setRowTreeDepth(11);
m.setMaxRowLength(200);
m.setMaxQualifierLength(50);
m.setMaxTagsLength(40);
m.setMinTimestamp(1318966363481L);
m.setTimestampDeltaWidth(3);
m.setMinMvccVersion(100L);
m.setMvccVersionDeltaWidth(4);
m.setAllSameType(false);
m.setAllTypes(KeyValue.Type.Delete.getCode());
m.setNumUniqueRows(88);
m.setNumUniqueFamilies(1);
m.setNumUniqueQualifiers(56);
m.setNumUniqueTags(5);
return m;
}
@Test
public void testStreamSerialization() throws IOException {
PrefixTreeBlockMeta original = createSample();
ByteArrayOutputStream os = new ByteArrayOutputStream(10000);
original.writeVariableBytesToOutputStream(os);
ByteBuffer buffer = ByteBuffer.wrap(os.toByteArray());
PrefixTreeBlockMeta roundTripped = new PrefixTreeBlockMeta(new SingleByteBuff(buffer));
Assert.assertTrue(original.equals(roundTripped));
}
}

View File

@ -1,78 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.codec.prefixtree.builder;
import java.util.Collection;
import java.util.List;
import org.apache.hadoop.hbase.testclassification.MiscTests;
import org.apache.hadoop.hbase.testclassification.SmallTests;
import org.apache.hadoop.hbase.codec.prefixtree.encode.tokenize.Tokenizer;
import org.apache.hadoop.hbase.codec.prefixtree.encode.tokenize.TokenizerNode;
import org.apache.hadoop.hbase.codec.prefixtree.encode.tokenize.TokenizerRowSearchResult;
import org.apache.hadoop.hbase.util.SimpleMutableByteRange;
import org.apache.hadoop.hbase.util.Bytes;
import org.junit.Assert;
import org.junit.Test;
import org.junit.experimental.categories.Category;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
import org.junit.runners.Parameterized.Parameters;
@Category({MiscTests.class,SmallTests.class})
@RunWith(Parameterized.class)
public class TestTokenizer {
@Parameters
public static Collection<Object[]> parameters() {
return new TestTokenizerData.InMemory().getAllAsObjectArray();
}
private List<byte[]> inputs;
private Tokenizer builder;
private List<byte[]> roundTripped;
public TestTokenizer(TestTokenizerData sortedByteArrays) {
this.inputs = sortedByteArrays.getInputs();
this.builder = new Tokenizer();
for (byte[] array : inputs) {
builder.addSorted(new SimpleMutableByteRange(array));
}
this.roundTripped = builder.getArrays();
}
@Test
public void testReaderRoundTrip() {
Assert.assertEquals(inputs.size(), roundTripped.size());
Assert.assertTrue(Bytes.isSorted(roundTripped));
Assert.assertTrue(Bytes.equals(inputs, roundTripped));
}
@Test
public void testSearching() {
for (byte[] input : inputs) {
TokenizerRowSearchResult resultHolder = new TokenizerRowSearchResult();
builder.getNode(resultHolder, input, 0, input.length);
TokenizerNode n = resultHolder.getMatchingNode();
byte[] output = n.getNewByteArray();
Assert.assertTrue(Bytes.equals(input, output));
}
}
}

View File

@ -1,42 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.codec.prefixtree.builder;
import java.util.Collection;
import java.util.List;
import org.apache.hadoop.hbase.codec.prefixtree.builder.data.TestTokenizerDataBasic;
import org.apache.hadoop.hbase.codec.prefixtree.builder.data.TestTokenizerDataEdgeCase;
import org.apache.hadoop.hbase.shaded.com.google.common.collect.Lists;
public interface TestTokenizerData {
List<byte[]> getInputs();
List<byte[]> getOutputs();
class InMemory {
public Collection<Object[]> getAllAsObjectArray() {
List<Object[]> all = Lists.newArrayList();
all.add(new Object[] { new TestTokenizerDataBasic() });
all.add(new Object[] { new TestTokenizerDataEdgeCase() });
return all;
}
}
}

View File

@ -1,90 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.codec.prefixtree.builder;
import java.util.List;
import org.apache.hadoop.hbase.testclassification.MiscTests;
import org.apache.hadoop.hbase.testclassification.SmallTests;
import org.apache.hadoop.hbase.codec.prefixtree.encode.tokenize.Tokenizer;
import org.apache.hadoop.hbase.util.SimpleMutableByteRange;
import org.apache.hadoop.hbase.util.Bytes;
import org.junit.Assert;
import org.junit.Test;
import org.junit.experimental.categories.Category;
import org.apache.hadoop.hbase.shaded.com.google.common.collect.Lists;
@Category({MiscTests.class, SmallTests.class})
public class TestTreeDepth {
@Test
public void testSingleNode() {
List<String> inputs = Lists.newArrayList("a");
testInternal(inputs, 1);
}
@Test
public void testSimpleBranch() {
List<String> inputs = Lists.newArrayList("a", "aa", "ab");
testInternal(inputs, 2);
}
@Test
public void testEmptyRoot() {
List<String> inputs = Lists.newArrayList("a", "b");
testInternal(inputs, 2);
}
@Test
public void testRootAsNub() {
List<String> inputs = Lists.newArrayList("a", "aa");
testInternal(inputs, 2);
}
@Test
public void testRootAsNubPlusNub() {
List<String> inputs = Lists.newArrayList("a", "aa", "aaa");
testInternal(inputs, 3);
}
@Test
public void testEmptyRootPlusNub() {
List<String> inputs = Lists.newArrayList("a", "aa", "b");
testInternal(inputs, 3);
}
@Test
public void testSplitDistantAncestor() {
List<String> inputs = Lists.newArrayList("a", "ac", "acd", "b");
testInternal(inputs, 4);
}
protected void testInternal(List<String> inputs, int expectedTreeDepth) {
Tokenizer builder = new Tokenizer();
for (String s : inputs) {
SimpleMutableByteRange b = new SimpleMutableByteRange(Bytes.toBytes(s));
builder.addSorted(b);
}
Assert.assertEquals(1, builder.getRoot().getNodeDepth());
Assert.assertEquals(expectedTreeDepth, builder.getTreeDepth());
}
}

View File

@ -1,51 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.codec.prefixtree.builder.data;
import java.util.List;
import org.apache.hadoop.hbase.codec.prefixtree.builder.TestTokenizerData;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.shaded.com.google.common.collect.Lists;
public class TestTokenizerDataBasic implements TestTokenizerData {
static List<byte[]> d = Lists.newArrayList();
static {
List<String> s = Lists.newArrayList();
s.add("abc");// nub
s.add("abcde");// leaf
s.add("bbc");// causes root to split and have empty token
s.add("bbc");// makes numOccurrences=2 on the bbc node
s.add("cd");// just to get another node after the numOccurrences=2
d = Bytes.getUtf8ByteArrays(s);
}
@Override
public List<byte[]> getInputs() {
return d;
}
@Override
public List<byte[]> getOutputs() {
return d;
}
}

View File

@ -1,53 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.codec.prefixtree.builder.data;
import java.util.List;
import org.apache.hadoop.hbase.codec.prefixtree.builder.TestTokenizerData;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.shaded.com.google.common.collect.Lists;
public class TestTokenizerDataEdgeCase implements TestTokenizerData {
static List<byte[]> d = Lists.newArrayList();
static {
/*
* tricky little combination because the acegi token will partially match abdfi, but when you
* descend into abdfi, it will not fully match
*/
List<String> s = Lists.newArrayList();
s.add("abdfh");
s.add("abdfi");
s.add("acegi");
d = Bytes.getUtf8ByteArrays(s);
}
@Override
public List<byte[]> getInputs() {
return d;
}
@Override
public List<byte[]> getOutputs() {
return d;
}
}

View File

@ -1,127 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.codec.prefixtree.column;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.Collection;
import java.util.List;
import org.apache.hadoop.hbase.nio.SingleByteBuff;
import org.apache.hadoop.hbase.testclassification.MiscTests;
import org.apache.hadoop.hbase.testclassification.SmallTests;
import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeBlockMeta;
import org.apache.hadoop.hbase.codec.prefixtree.decode.column.ColumnReader;
import org.apache.hadoop.hbase.codec.prefixtree.encode.column.ColumnSectionWriter;
import org.apache.hadoop.hbase.codec.prefixtree.encode.other.ColumnNodeType;
import org.apache.hadoop.hbase.codec.prefixtree.encode.tokenize.Tokenizer;
import org.apache.hadoop.hbase.codec.prefixtree.encode.tokenize.TokenizerNode;
import org.apache.hadoop.hbase.util.ByteRange;
import org.apache.hadoop.hbase.util.ByteRangeUtils;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.byterange.impl.ByteRangeTreeSet;
import org.junit.Assert;
import org.junit.Test;
import org.junit.experimental.categories.Category;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
import org.junit.runners.Parameterized.Parameters;
import org.apache.hadoop.hbase.shaded.com.google.common.collect.Lists;
@Category({MiscTests.class, SmallTests.class})
@RunWith(Parameterized.class)
public class TestColumnBuilder {
@Parameters
public static Collection<Object[]> parameters() {
return new TestColumnData.InMemory().getAllAsObjectArray();
}
/*********** fields **********************************/
protected TestColumnData columns;
protected ByteRangeTreeSet columnSorter;
protected List<ByteRange> sortedUniqueColumns;
protected PrefixTreeBlockMeta blockMeta;
protected Tokenizer builder;
protected ColumnSectionWriter writer;
protected byte[] bytes;
protected byte[] buffer;
protected ColumnReader reader;
/*************** construct ****************************/
public TestColumnBuilder(TestColumnData columns) {
this.columns = columns;
List<ByteRange> inputs = columns.getInputs();
this.columnSorter = new ByteRangeTreeSet(inputs);
this.sortedUniqueColumns = columnSorter.compile().getSortedRanges();
List<byte[]> copies = ByteRangeUtils.copyToNewArrays(sortedUniqueColumns);
Assert.assertTrue(Bytes.isSorted(copies));
this.blockMeta = new PrefixTreeBlockMeta();
this.blockMeta.setNumMetaBytes(0);
this.blockMeta.setNumRowBytes(0);
this.builder = new Tokenizer();
}
/************* methods ********************************/
@Test
public void testReaderRoundTrip() throws IOException {
for (int i = 0; i < sortedUniqueColumns.size(); ++i) {
ByteRange column = sortedUniqueColumns.get(i);
builder.addSorted(column);
}
List<byte[]> builderOutputArrays = builder.getArrays();
for (int i = 0; i < builderOutputArrays.size(); ++i) {
byte[] inputArray = sortedUniqueColumns.get(i).deepCopyToNewArray();
byte[] outputArray = builderOutputArrays.get(i);
boolean same = Bytes.equals(inputArray, outputArray);
Assert.assertTrue(same);
}
Assert.assertEquals(sortedUniqueColumns.size(), builderOutputArrays.size());
writer = new ColumnSectionWriter(blockMeta, builder, ColumnNodeType.QUALIFIER);
ByteArrayOutputStream baos = new ByteArrayOutputStream();
writer.compile().writeBytes(baos);
bytes = baos.toByteArray();
buffer = new byte[blockMeta.getMaxQualifierLength()];
reader = new ColumnReader(buffer, ColumnNodeType.QUALIFIER);
reader.initOnBlock(blockMeta, new SingleByteBuff(ByteBuffer.wrap(bytes)));
List<TokenizerNode> builderNodes = Lists.newArrayList();
builder.appendNodes(builderNodes, true, true);
int i = 0;
for (TokenizerNode builderNode : builderNodes) {
if (!builderNode.hasOccurrences()) {
continue;
}
Assert.assertEquals(1, builderNode.getNumOccurrences());// we de-duped before adding to
// builder
int position = builderNode.getOutputArrayOffset();
byte[] output = reader.populateBuffer(position).copyBufferToNewArray();
boolean same = Bytes.equals(sortedUniqueColumns.get(i).deepCopyToNewArray(), output);
Assert.assertTrue(same);
++i;
}
}
}

View File

@ -1,45 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.codec.prefixtree.column;
import java.util.Collection;
import java.util.List;
import org.apache.hadoop.hbase.codec.prefixtree.column.data.TestColumnDataRandom;
import org.apache.hadoop.hbase.codec.prefixtree.column.data.TestColumnDataSimple;
import org.apache.hadoop.hbase.util.ByteRange;
import org.apache.hadoop.hbase.shaded.com.google.common.collect.Lists;
public interface TestColumnData {
List<ByteRange> getInputs();
List<ByteRange> getOutputs();
class InMemory {
public Collection<Object[]> getAllAsObjectArray() {
List<Object[]> all = Lists.newArrayList();
all.add(new Object[] { new TestColumnDataSimple() });
for (int leftShift = 0; leftShift < 16; ++leftShift) {
all.add(new Object[] { new TestColumnDataRandom(1 << leftShift) });
}
return all;
}
}
}

View File

@ -1,63 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.codec.prefixtree.column.data;
import java.util.List;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.codec.prefixtree.column.TestColumnData;
import org.apache.hadoop.hbase.util.ByteRange;
import org.apache.hadoop.hbase.util.SimpleMutableByteRange;
import org.apache.hadoop.hbase.util.byterange.ByteRangeSet;
import org.apache.hadoop.hbase.util.byterange.impl.ByteRangeTreeSet;
import org.apache.hadoop.hbase.util.RedundantKVGenerator;
import org.apache.hadoop.hbase.shaded.com.google.common.collect.Lists;
public class TestColumnDataRandom implements TestColumnData {
private List<ByteRange> inputs = Lists.newArrayList();
private List<ByteRange> outputs = Lists.newArrayList();
public TestColumnDataRandom(int numColumns) {
RedundantKVGenerator generator = new RedundantKVGenerator();
ByteRangeSet sortedColumns = new ByteRangeTreeSet();
List<KeyValue> d = generator.generateTestKeyValues(numColumns);
for (KeyValue col : d) {
ByteRange colRange = new SimpleMutableByteRange(CellUtil.cloneQualifier(col));
inputs.add(colRange);
sortedColumns.add(colRange);
}
for (ByteRange col : sortedColumns.compile().getSortedRanges()) {
outputs.add(col);
}
}
@Override
public List<ByteRange> getInputs() {
return inputs;
}
@Override
public List<ByteRange> getOutputs() {
return outputs;
}
}

View File

@ -1,52 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.codec.prefixtree.column.data;
import java.util.List;
import org.apache.hadoop.hbase.codec.prefixtree.column.TestColumnData;
import org.apache.hadoop.hbase.util.ByteRange;
import org.apache.hadoop.hbase.util.ByteRangeUtils;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.shaded.com.google.common.collect.Lists;
public class TestColumnDataSimple implements TestColumnData {
@Override
public List<ByteRange> getInputs() {
List<String> d = Lists.newArrayList();
d.add("abc");
d.add("abcde");
d.add("abc");
d.add("bbc");
d.add("abc");
return ByteRangeUtils.fromArrays(Bytes.getUtf8ByteArrays(d));
}
@Override
public List<ByteRange> getOutputs() {
List<String> d = Lists.newArrayList();
d.add("abc");
d.add("abcde");
d.add("bbc");
return ByteRangeUtils.fromArrays(Bytes.getUtf8ByteArrays(d));
}
}

View File

@ -1,54 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.codec.prefixtree.row;
import java.util.List;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeBlockMeta;
import org.apache.hadoop.hbase.codec.prefixtree.scanner.CellSearcher;
import org.apache.hadoop.hbase.shaded.com.google.common.collect.Lists;
public abstract class BaseTestRowData implements TestRowData {
@Override
public List<Integer> getRowStartIndexes() {
List<Integer> rowStartIndexes = Lists.newArrayList();
rowStartIndexes.add(0);
List<KeyValue> inputs = getInputs();
for (int i = 1; i < inputs.size(); ++i) {
KeyValue lastKv = inputs.get(i - 1);
KeyValue kv = inputs.get(i);
if (!CellUtil.matchingRows(lastKv, kv)) {
rowStartIndexes.add(i);
}
}
return rowStartIndexes;
}
@Override
public void individualBlockMetaAssertions(PrefixTreeBlockMeta blockMeta) {
}
@Override
public void individualSearcherAssertions(CellSearcher searcher) {
}
}

View File

@ -1,229 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.codec.prefixtree.row;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.Arrays;
import java.util.Collection;
import java.util.List;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.PrivateCellUtil;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.KeyValueUtil;
import org.apache.hadoop.hbase.nio.ByteBuff;
import org.apache.hadoop.hbase.nio.SingleByteBuff;
import org.apache.hadoop.hbase.testclassification.MiscTests;
import org.apache.hadoop.hbase.testclassification.SmallTests;
import org.apache.hadoop.hbase.codec.prefixtree.decode.DecoderFactory;
import org.apache.hadoop.hbase.codec.prefixtree.encode.PrefixTreeEncoder;
import org.apache.hadoop.hbase.codec.prefixtree.row.data.TestRowDataSearchWithPrefix;
import org.apache.hadoop.hbase.codec.prefixtree.scanner.CellScannerPosition;
import org.apache.hadoop.hbase.codec.prefixtree.scanner.CellSearcher;
import org.apache.hadoop.hbase.util.ByteBufferUtils;
import org.apache.hadoop.hbase.util.CollectionUtils;
import org.junit.Assert;
import org.junit.Test;
import org.junit.experimental.categories.Category;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
import org.junit.runners.Parameterized.Parameters;
@Category({MiscTests.class, SmallTests.class})
@RunWith(Parameterized.class)
public class TestPrefixTreeSearcher {
protected static int BLOCK_START = 7;
@Parameters
public static Collection<Object[]> parameters() {
return TestRowData.InMemory.getAllAsObjectArray();
}
protected TestRowData rows;
protected ByteBuff block;
public TestPrefixTreeSearcher(TestRowData testRows) throws IOException {
this.rows = testRows;
ByteArrayOutputStream os = new ByteArrayOutputStream(1 << 20);
PrefixTreeEncoder kvBuilder = new PrefixTreeEncoder(os, true);
for (KeyValue kv : rows.getInputs()) {
kvBuilder.write(kv);
}
kvBuilder.flush();
byte[] outputBytes = os.toByteArray();
ByteBuffer out = ByteBuffer.allocateDirect(outputBytes.length);
ByteBufferUtils.copyFromArrayToBuffer(out, outputBytes, 0, outputBytes.length);
out.position(0);
this.block = new SingleByteBuff(out);
}
@Test
public void testScanForwards() throws IOException {
CellSearcher searcher = null;
try {
searcher = DecoderFactory.checkOut(block, true);
int i = -1;
while (searcher.advance()) {
++i;
KeyValue inputCell = rows.getInputs().get(i);
Cell outputCell = searcher.current();
// check all 3 permutations of equals()
Assert.assertEquals(inputCell, outputCell);
Assert.assertEquals(outputCell, inputCell);
Assert.assertTrue(CellUtil.equals(inputCell, outputCell));
}
Assert.assertEquals(rows.getInputs().size(), i + 1);
} finally {
DecoderFactory.checkIn(searcher);
}
}
@Test
public void testScanBackwards() throws IOException {
CellSearcher searcher = null;
try {
searcher = DecoderFactory.checkOut(block, true);
searcher.positionAfterLastCell();
int i = -1;
while (searcher.previous()) {
++i;
int oppositeIndex = rows.getInputs().size() - i - 1;
KeyValue inputKv = rows.getInputs().get(oppositeIndex);
KeyValue outputKv = KeyValueUtil.copyToNewKeyValue(searcher.current());
Assert.assertEquals(inputKv, outputKv);
}
Assert.assertEquals(rows.getInputs().size(), i + 1);
} finally {
DecoderFactory.checkIn(searcher);
}
}
@Test
public void testRandomSeekHits() throws IOException {
CellSearcher searcher = null;
try {
searcher = DecoderFactory.checkOut(block, true);
for (KeyValue kv : rows.getInputs()) {
boolean hit = searcher.positionAt(kv);
Assert.assertTrue(hit);
Cell foundKv = searcher.current();
Assert.assertTrue(CellUtil.equals(kv, foundKv));
}
} finally {
DecoderFactory.checkIn(searcher);
}
}
@Test
public void testRandomSeekMisses() throws IOException {
CellSearcher searcher = null;
List<Integer> rowStartIndexes = rows.getRowStartIndexes();
try {
searcher = DecoderFactory.checkOut(block, true);
//test both the positionAtOrBefore and positionAtOrAfter methods
for(boolean beforeVsAfterOnMiss : new boolean[]{true, false}){
for (int i=0; i < rows.getInputs().size(); ++i) {
KeyValue kv = rows.getInputs().get(i);
//nextRow
Cell inputNextRow = PrivateCellUtil.createFirstOnNextRow(kv);
CellScannerPosition position = beforeVsAfterOnMiss
? searcher.positionAtOrBefore(inputNextRow)
: searcher.positionAtOrAfter(inputNextRow);
boolean isFirstInRow = rowStartIndexes.contains(i);
if(isFirstInRow){
int rowIndex = rowStartIndexes.indexOf(i);
if(rowIndex < rowStartIndexes.size() - 1){
if(beforeVsAfterOnMiss){
Assert.assertEquals(CellScannerPosition.BEFORE, position);
}else{
Assert.assertEquals(CellScannerPosition.AFTER, position);
}
int expectedInputIndex = beforeVsAfterOnMiss
? rowStartIndexes.get(rowIndex + 1) - 1
: rowStartIndexes.get(rowIndex + 1);
Assert.assertEquals(rows.getInputs().get(expectedInputIndex), searcher.current());
}
}
//previous KV
KeyValue inputPreviousKv = KeyValueUtil.previousKey(kv);
boolean hit = searcher.positionAt(inputPreviousKv);
Assert.assertFalse(hit);
position = searcher.positionAtOrAfter(inputPreviousKv);
if(CollectionUtils.isLastIndex(rows.getInputs(), i)){
Assert.assertTrue(CellScannerPosition.AFTER_LAST == position);
}else{
Assert.assertTrue(CellScannerPosition.AFTER == position);
/*
* TODO: why i+1 instead of i?
*/
Assert.assertEquals(rows.getInputs().get(i+1), searcher.current());
}
}
}
} finally {
DecoderFactory.checkIn(searcher);
}
}
@Test
public void testRandomSeekIndividualAssertions() throws IOException {
CellSearcher searcher = null;
try {
searcher = DecoderFactory.checkOut(block, true);
rows.individualSearcherAssertions(searcher);
} finally {
DecoderFactory.checkIn(searcher);
}
}
@Test
public void testSeekWithPrefix() throws IOException {
if (!(rows instanceof TestRowDataSearchWithPrefix)) {
return;
}
CellSearcher searcher = null;
try {
searcher = DecoderFactory.checkOut(block, true);
// seek with half bytes of second row key, should return second row
KeyValue kv = rows.getInputs().get(1);
KeyValue firstKVOnRow = KeyValueUtil.createFirstOnRow(Arrays.copyOfRange(
kv.getRowArray(), kv.getRowOffset(),
kv.getRowOffset() + kv.getRowLength() / 2));
CellScannerPosition position = searcher.positionAtOrAfter(firstKVOnRow);
Assert.assertEquals(CellScannerPosition.AFTER, position);
Assert.assertEquals(kv, searcher.current());
} finally {
DecoderFactory.checkIn(searcher);
}
}
}

View File

@ -1,105 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.codec.prefixtree.row;
import java.util.Collection;
import java.util.List;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeBlockMeta;
import org.apache.hadoop.hbase.codec.prefixtree.row.data.TestRowDataComplexQualifiers;
import org.apache.hadoop.hbase.codec.prefixtree.row.data.TestRowDataDeeper;
import org.apache.hadoop.hbase.codec.prefixtree.row.data.TestRowDataDifferentTimestamps;
import org.apache.hadoop.hbase.codec.prefixtree.row.data.TestRowDataEmpty;
import org.apache.hadoop.hbase.codec.prefixtree.row.data.TestRowDataExerciseFInts;
import org.apache.hadoop.hbase.codec.prefixtree.row.data.TestRowDataNub;
import org.apache.hadoop.hbase.codec.prefixtree.row.data.TestRowDataNumberStrings;
import org.apache.hadoop.hbase.codec.prefixtree.row.data.TestRowDataQualifierByteOrdering;
import org.apache.hadoop.hbase.codec.prefixtree.row.data.TestRowDataRandomKeyValues;
import org.apache.hadoop.hbase.codec.prefixtree.row.data.TestRowDataRandomKeyValuesWithTags;
import org.apache.hadoop.hbase.codec.prefixtree.row.data.TestRowDataSearchWithPrefix;
import org.apache.hadoop.hbase.codec.prefixtree.row.data.TestRowDataSearcherRowMiss;
import org.apache.hadoop.hbase.codec.prefixtree.row.data.TestRowDataSimple;
import org.apache.hadoop.hbase.codec.prefixtree.row.data.TestRowDataSingleQualifier;
import org.apache.hadoop.hbase.codec.prefixtree.row.data.TestRowDataTrivial;
import org.apache.hadoop.hbase.codec.prefixtree.row.data.TestRowDataTrivialWithTags;
import org.apache.hadoop.hbase.codec.prefixtree.row.data.TestRowDataUrls;
import org.apache.hadoop.hbase.codec.prefixtree.row.data.TestRowDataUrlsExample;
import org.apache.hadoop.hbase.codec.prefixtree.scanner.CellSearcher;
import org.apache.hadoop.hbase.shaded.com.google.common.collect.Lists;
/*
* A master class for registering different implementations of TestRowData.
*/
public interface TestRowData {
List<KeyValue> getInputs();
List<Integer> getRowStartIndexes();
void individualBlockMetaAssertions(PrefixTreeBlockMeta blockMeta);
void individualSearcherAssertions(CellSearcher searcher);
static class InMemory {
/*
* The following are different styles of data that the codec may encounter. Having these small
* representations of the data helps pinpoint what is wrong if the encoder breaks.
*/
public static Collection<TestRowData> getAll() {
List<TestRowData> all = Lists.newArrayList();
//simple
all.add(new TestRowDataEmpty());
all.add(new TestRowDataTrivial());
all.add(new TestRowDataTrivialWithTags());
all.add(new TestRowDataSimple());
all.add(new TestRowDataDeeper());
//more specific
all.add(new TestRowDataSingleQualifier());
// all.add(new TestRowDataMultiFamilies());//multiple families disabled in PrefixTreeEncoder
all.add(new TestRowDataNub());
all.add(new TestRowDataSearcherRowMiss());
all.add(new TestRowDataQualifierByteOrdering());
all.add(new TestRowDataComplexQualifiers());
all.add(new TestRowDataDifferentTimestamps());
//larger data volumes (hard to debug)
all.add(new TestRowDataNumberStrings());
all.add(new TestRowDataUrls());
all.add(new TestRowDataUrlsExample());
all.add(new TestRowDataExerciseFInts());
all.add(new TestRowDataRandomKeyValues());
all.add(new TestRowDataRandomKeyValuesWithTags());
//test data for HBase-12078
all.add(new TestRowDataSearchWithPrefix());
return all;
}
public static Collection<Object[]> getAllAsObjectArray() {
List<Object[]> all = Lists.newArrayList();
for (TestRowData testRows : getAll()) {
all.add(new Object[] { testRows });
}
return all;
}
}
}

View File

@ -1,194 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.codec.prefixtree.row;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.Collection;
import java.util.List;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.KeyValueUtil;
import org.apache.hadoop.hbase.nio.ByteBuff;
import org.apache.hadoop.hbase.nio.SingleByteBuff;
import org.apache.hadoop.hbase.testclassification.MiscTests;
import org.apache.hadoop.hbase.testclassification.SmallTests;
import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeBlockMeta;
import org.apache.hadoop.hbase.codec.prefixtree.decode.PrefixTreeArraySearcher;
import org.apache.hadoop.hbase.codec.prefixtree.encode.PrefixTreeEncoder;
import org.apache.hadoop.hbase.util.ByteBufferUtils;
import org.apache.hadoop.hbase.util.Bytes;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
import org.junit.experimental.categories.Category;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
import org.junit.runners.Parameterized.Parameters;
import org.apache.hadoop.hbase.shaded.com.google.common.collect.Lists;
@Category({MiscTests.class, SmallTests.class})
@RunWith(Parameterized.class)
public class TestRowEncoder {
protected static int BLOCK_START = 7;
@Parameters
public static Collection<Object[]> parameters() {
return TestRowData.InMemory.getAllAsObjectArray();
}
protected TestRowData rows;
protected List<KeyValue> inputKvs;
protected boolean includeMemstoreTS = true;
protected ByteArrayOutputStream os;
protected PrefixTreeEncoder encoder;
protected int totalBytes;
protected PrefixTreeBlockMeta blockMetaWriter;
protected byte[] outputBytes;
protected ByteBuff buffer;
protected ByteArrayInputStream is;
protected PrefixTreeBlockMeta blockMetaReader;
protected byte[] inputBytes;
protected PrefixTreeArraySearcher searcher;
public TestRowEncoder(TestRowData testRows) {
this.rows = testRows;
}
@Before
public void compile() throws IOException {
// Always run with tags. But should also ensure that KVs without tags work fine
os = new ByteArrayOutputStream(1 << 20);
encoder = new PrefixTreeEncoder(os, includeMemstoreTS);
inputKvs = rows.getInputs();
for (KeyValue kv : inputKvs) {
encoder.write(kv);
}
encoder.flush();
totalBytes = encoder.getTotalBytes();
blockMetaWriter = encoder.getBlockMeta();
outputBytes = os.toByteArray();
// start reading, but save the assertions for @Test methods
ByteBuffer out = ByteBuffer.allocateDirect(outputBytes.length);
ByteBufferUtils.copyFromArrayToBuffer(out, outputBytes, 0, outputBytes.length);
out.position(0);
buffer = new SingleByteBuff(out);
blockMetaReader = new PrefixTreeBlockMeta(buffer);
searcher = new PrefixTreeArraySearcher(blockMetaReader, blockMetaReader.getRowTreeDepth(),
blockMetaReader.getMaxRowLength(), blockMetaReader.getMaxQualifierLength(),
blockMetaReader.getMaxTagsLength());
searcher.initOnBlock(blockMetaReader, buffer, includeMemstoreTS);
}
@Test
public void testEncoderOutput() throws IOException {
Assert.assertEquals(totalBytes, outputBytes.length);
Assert.assertEquals(blockMetaWriter, blockMetaReader);
}
@Test
public void testForwardScanner() {
int counter = -1;
while (searcher.advance()) {
++counter;
KeyValue inputKv = rows.getInputs().get(counter);
KeyValue outputKv = KeyValueUtil.copyToNewKeyValue(searcher.current());
assertKeyAndValueEqual(inputKv, outputKv);
}
// assert same number of cells
Assert.assertEquals(rows.getInputs().size(), counter + 1);
}
/**
* probably not needed since testReverseScannerWithJitter() below is more thorough
*/
@Test
public void testReverseScanner() {
searcher.positionAfterLastCell();
int counter = -1;
while (searcher.previous()) {
++counter;
int oppositeIndex = rows.getInputs().size() - counter - 1;
KeyValue inputKv = rows.getInputs().get(oppositeIndex);
KeyValue outputKv = KeyValueUtil.copyToNewKeyValue(searcher.current());
assertKeyAndValueEqual(inputKv, outputKv);
}
Assert.assertEquals(rows.getInputs().size(), counter + 1);
}
/**
* Exercise the nubCellsRemain variable by calling next+previous. NubCellsRemain is basically
* a special fan index.
*/
@Test
public void testReverseScannerWithJitter() {
searcher.positionAfterLastCell();
int counter = -1;
while (true) {
boolean foundCell = searcher.previous();
if (!foundCell) {
break;
}
++counter;
// a next+previous should cancel out
if (!searcher.isAfterLast()) {
searcher.advance();
searcher.previous();
}
int oppositeIndex = rows.getInputs().size() - counter - 1;
KeyValue inputKv = rows.getInputs().get(oppositeIndex);
KeyValue outputKv = KeyValueUtil.copyToNewKeyValue(searcher.current());
assertKeyAndValueEqual(inputKv, outputKv);
}
Assert.assertEquals(rows.getInputs().size(), counter + 1);
}
@Test
public void testIndividualBlockMetaAssertions() {
rows.individualBlockMetaAssertions(blockMetaReader);
}
/**************** helper **************************/
protected void assertKeyAndValueEqual(Cell expected, Cell actual) {
// assert keys are equal (doesn't compare values)
Assert.assertEquals(expected, actual);
if (includeMemstoreTS) {
Assert.assertEquals(expected.getSequenceId(), actual.getSequenceId());
}
// assert values equal
Assert.assertTrue(Bytes.equals(expected.getValueArray(), expected.getValueOffset(),
expected.getValueLength(), actual.getValueArray(), actual.getValueOffset(),
actual.getValueLength()));
}
}

View File

@ -1,67 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.codec.prefixtree.row.data;
import java.util.List;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeTestConstants;
import org.apache.hadoop.hbase.codec.prefixtree.row.BaseTestRowData;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.shaded.com.google.common.collect.Lists;
public class TestRowDataComplexQualifiers extends BaseTestRowData{
static byte[]
Arow = Bytes.toBytes("Arow"),
cf = PrefixTreeTestConstants.TEST_CF,
v0 = Bytes.toBytes("v0");
static List<byte[]> qualifiers = Lists.newArrayList();
static {
List<String> qualifierStrings = Lists.newArrayList();
qualifierStrings.add("cq");
qualifierStrings.add("cq0");
qualifierStrings.add("cq1");
qualifierStrings.add("cq2");
qualifierStrings.add("dq0");// second root level fan
qualifierStrings.add("dq1");// nub
qualifierStrings.add("dq111");// leaf on nub
qualifierStrings.add("dq11111a");// leaf on leaf
for (String s : qualifierStrings) {
qualifiers.add(Bytes.toBytes(s));
}
}
static long ts = 55L;
static List<KeyValue> d = Lists.newArrayList();
static {
for (byte[] qualifier : qualifiers) {
d.add(new KeyValue(Arow, cf, qualifier, ts, v0));
}
}
@Override
public List<KeyValue> getInputs() {
return d;
}
}

View File

@ -1,85 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.codec.prefixtree.row.data;
import java.util.List;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.KeyValueUtil;
import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeBlockMeta;
import org.apache.hadoop.hbase.codec.prefixtree.row.BaseTestRowData;
import org.apache.hadoop.hbase.codec.prefixtree.scanner.CellScannerPosition;
import org.apache.hadoop.hbase.codec.prefixtree.scanner.CellSearcher;
import org.apache.hadoop.hbase.util.Bytes;
import org.junit.Assert;
import org.apache.hadoop.hbase.shaded.com.google.common.collect.Lists;
/*
* Goes beyond a trivial trie to add a branch on the "cf" node
*/
public class TestRowDataDeeper extends BaseTestRowData{
static byte[]
cdc = Bytes.toBytes("cdc"),
cf6 = Bytes.toBytes("cf6"),
cfc = Bytes.toBytes("cfc"),
f = Bytes.toBytes("f"),
q = Bytes.toBytes("q"),
v = Bytes.toBytes("v");
static long
ts = 55L;
static List<KeyValue> d = Lists.newArrayList();
static{
d.add(new KeyValue(cdc, f, q, ts, v));
d.add(new KeyValue(cf6, f, q, ts, v));
d.add(new KeyValue(cfc, f, q, ts, v));
}
@Override
public List<KeyValue> getInputs() {
return d;
}
@Override
public void individualBlockMetaAssertions(PrefixTreeBlockMeta blockMeta) {
//0: token:c; fan:d,f
//1: token:f; fan:6,c
//2: leaves
Assert.assertEquals(3, blockMeta.getRowTreeDepth());
}
@Override
public void individualSearcherAssertions(CellSearcher searcher) {
/**
* The searcher should get a token mismatch on the "r" branch. Assert that it skips not only
* rA, but rB as well.
*/
KeyValue cfcRow = KeyValueUtil.createFirstOnRow(Bytes.toBytes("cfc"));
CellScannerPosition position = searcher.positionAtOrAfter(cfcRow);
Assert.assertEquals(CellScannerPosition.AFTER, position);
Assert.assertEquals(d.get(2), searcher.current());
searcher.previous();
Assert.assertEquals(d.get(1), searcher.current());
}
}

View File

@ -1,94 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.codec.prefixtree.row.data;
import java.util.List;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeBlockMeta;
import org.apache.hadoop.hbase.codec.prefixtree.row.BaseTestRowData;
import org.apache.hadoop.hbase.util.Bytes;
import org.junit.Assert;
import org.apache.hadoop.hbase.shaded.com.google.common.collect.Lists;
/*
* test different timestamps
*/
public class TestRowDataDifferentTimestamps extends BaseTestRowData{
static byte[]
Arow = Bytes.toBytes("Arow"),
Brow = Bytes.toBytes("Brow"),
cf = Bytes.toBytes("fammy"),
cq0 = Bytes.toBytes("cq0"),
cq1 = Bytes.toBytes("cq1"),
v0 = Bytes.toBytes("v0");
static List<KeyValue> d = Lists.newArrayList();
static{
KeyValue kv0 = new KeyValue(Arow, cf, cq0, 0L, v0);
kv0.setSequenceId(123456789L);
d.add(kv0);
KeyValue kv1 = new KeyValue(Arow, cf, cq1, 1L, v0);
kv1.setSequenceId(3L);
d.add(kv1);
KeyValue kv2 = new KeyValue(Brow, cf, cq0, 12345678L, v0);
kv2.setSequenceId(65537L);
d.add(kv2);
//watch out... Long.MAX_VALUE comes back as 1332221664203, even with other encoders
//d.add(new KeyValue(Brow, cf, cq1, Long.MAX_VALUE, v0));
KeyValue kv3 = new KeyValue(Brow, cf, cq1, Long.MAX_VALUE-1, v0);
kv3.setSequenceId(1L);
d.add(kv3);
KeyValue kv4 = new KeyValue(Brow, cf, cq1, 999999999, v0);
//don't set memstoreTS
d.add(kv4);
KeyValue kv5 = new KeyValue(Brow, cf, cq1, 12345, v0);
kv5.setSequenceId(0L);
d.add(kv5);
}
@Override
public List<KeyValue> getInputs() {
return d;
}
@Override
public void individualBlockMetaAssertions(PrefixTreeBlockMeta blockMeta) {
Assert.assertTrue(blockMeta.getNumMvccVersionBytes() > 0);
Assert.assertEquals(12, blockMeta.getNumValueBytes());
Assert.assertFalse(blockMeta.isAllSameTimestamp());
Assert.assertNotNull(blockMeta.getMinTimestamp());
Assert.assertTrue(blockMeta.getTimestampIndexWidth() > 0);
Assert.assertTrue(blockMeta.getTimestampDeltaWidth() > 0);
Assert.assertFalse(blockMeta.isAllSameMvccVersion());
Assert.assertNotNull(blockMeta.getMinMvccVersion());
Assert.assertTrue(blockMeta.getMvccVersionIndexWidth() > 0);
Assert.assertTrue(blockMeta.getMvccVersionDeltaWidth() > 0);
}
}

View File

@ -1,43 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.codec.prefixtree.row.data;
import java.util.List;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.KeyValue.Type;
import org.apache.hadoop.hbase.codec.prefixtree.row.BaseTestRowData;
import org.apache.hadoop.hbase.shaded.com.google.common.collect.Lists;
public class TestRowDataEmpty extends BaseTestRowData{
private static byte[] b = new byte[0];
static List<KeyValue> d = Lists.newArrayList();
static {
d.add(new KeyValue(b, b, b, 0L, Type.Put, b));
}
@Override
public List<KeyValue> getInputs() {
return d;
}
}

View File

@ -1,115 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.codec.prefixtree.row.data;
import java.util.ArrayList;
import java.util.List;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeBlockMeta;
import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeTestConstants;
import org.apache.hadoop.hbase.codec.prefixtree.row.BaseTestRowData;
import org.apache.hadoop.hbase.util.ByteRange;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.SimpleMutableByteRange;
import org.apache.hadoop.hbase.util.byterange.impl.ByteRangeTreeSet;
import org.junit.Assert;
import org.apache.hadoop.hbase.shaded.com.google.common.collect.Lists;
/*
* test different timestamps
*
* http://pastebin.com/7ks8kzJ2
* http://pastebin.com/MPn03nsK
*/
public class TestRowDataExerciseFInts extends BaseTestRowData{
static List<ByteRange> rows;
static{
List<String> rowStrings = new ArrayList<>(16);
rowStrings.add("com.edsBlog/directoryAa/pageAaa");
rowStrings.add("com.edsBlog/directoryAa/pageBbb");
rowStrings.add("com.edsBlog/directoryAa/pageCcc");
rowStrings.add("com.edsBlog/directoryAa/pageDdd");
rowStrings.add("com.edsBlog/directoryBb/pageEee");
rowStrings.add("com.edsBlog/directoryBb/pageFff");
rowStrings.add("com.edsBlog/directoryBb/pageGgg");
rowStrings.add("com.edsBlog/directoryBb/pageHhh");
rowStrings.add("com.isabellasBlog/directoryAa/pageAaa");
rowStrings.add("com.isabellasBlog/directoryAa/pageBbb");
rowStrings.add("com.isabellasBlog/directoryAa/pageCcc");
rowStrings.add("com.isabellasBlog/directoryAa/pageDdd");
rowStrings.add("com.isabellasBlog/directoryBb/pageEee");
rowStrings.add("com.isabellasBlog/directoryBb/pageFff");
rowStrings.add("com.isabellasBlog/directoryBb/pageGgg");
rowStrings.add("com.isabellasBlog/directoryBb/pageHhh");
ByteRangeTreeSet ba = new ByteRangeTreeSet();
for(String row : rowStrings){
ba.add(new SimpleMutableByteRange(Bytes.toBytes(row)));
}
rows = ba.compile().getSortedRanges();
}
static List<String> cols = Lists.newArrayList();
static{
cols.add("Chrome");
cols.add("Chromeb");
cols.add("Firefox");
cols.add("InternetExplorer");
cols.add("Opera");
cols.add("Safari");
cols.add("Z1stBrowserWithHuuuuuuuuuuuugeQualifier");
cols.add("Z2ndBrowserWithEvenBiggerQualifierMoreMoreMoreMoreMore");
cols.add("Z3rdBrowserWithEvenBiggerQualifierMoreMoreMoreMoreMore");
cols.add("Z4thBrowserWithEvenBiggerQualifierMoreMoreMoreMoreMore");
cols.add("Z5thBrowserWithEvenBiggerQualifierMoreMoreMoreMoreMore");
cols.add("Z6thBrowserWithEvenBiggerQualifierMoreMoreMoreMoreMore");
cols.add("Z7thBrowserWithEvenBiggerQualifierMoreMoreMoreMoreMore");
cols.add("Z8thBrowserWithEvenBiggerQualifierMoreMoreMoreMoreMore");
cols.add("Z9thBrowserWithEvenBiggerQualifierMoreMoreMoreMoreMore");
}
static long ts = 1234567890;
static int MAX_VALUE = 50;
static List<KeyValue> kvs = Lists.newArrayList();
static {
for (ByteRange row : rows) {
for (String col : cols) {
KeyValue kv = new KeyValue(row.deepCopyToNewArray(), PrefixTreeTestConstants.TEST_CF,
Bytes.toBytes(col), ts, KeyValue.Type.Put, Bytes.toBytes("VALUE"));
kvs.add(kv);
}
}
}
@Override
public List<KeyValue> getInputs() {
return kvs;
}
@Override
public void individualBlockMetaAssertions(PrefixTreeBlockMeta blockMeta) {
Assert.assertTrue(blockMeta.getNextNodeOffsetWidth() > 1);
Assert.assertTrue(blockMeta.getQualifierOffsetWidth() > 1);
}
}

View File

@ -1,60 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.codec.prefixtree.row.data;
import java.util.List;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.codec.prefixtree.row.BaseTestRowData;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.shaded.com.google.common.collect.Lists;
public class TestRowDataMultiFamilies extends BaseTestRowData{
static byte[]
rowA = Bytes.toBytes("rowA"),
rowB = Bytes.toBytes("rowB"),
famA = Bytes.toBytes("famA"),
famB = Bytes.toBytes("famB"),
famBB = Bytes.toBytes("famBB"),
q0 = Bytes.toBytes("q0"),
q1 = Bytes.toBytes("q1"),//start with a different character
vvv = Bytes.toBytes("vvv");
static long ts = 55L;
static List<KeyValue> d = Lists.newArrayList();
static {
d.add(new KeyValue(rowA, famA, q0, ts, vvv));
d.add(new KeyValue(rowA, famB, q1, ts, vvv));
d.add(new KeyValue(rowA, famBB, q0, ts, vvv));
d.add(new KeyValue(rowB, famA, q0, ts, vvv));
d.add(new KeyValue(rowB, famA, q1, ts, vvv));
d.add(new KeyValue(rowB, famB, q0, ts, vvv));
d.add(new KeyValue(rowB, famBB, q0, ts, vvv));
d.add(new KeyValue(rowB, famBB, q1, ts, vvv));
}
@Override
public List<KeyValue> getInputs() {
return d;
}
}

View File

@ -1,59 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.codec.prefixtree.row.data;
import java.util.List;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeTestConstants;
import org.apache.hadoop.hbase.codec.prefixtree.row.BaseTestRowData;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.shaded.com.google.common.collect.Lists;
public class TestRowDataNub extends BaseTestRowData{
static byte[]
rowA = Bytes.toBytes("rowA"),
rowB = Bytes.toBytes("rowB"),//nub
rowBB = Bytes.toBytes("rowBB"),
cf = PrefixTreeTestConstants.TEST_CF,
cq0 = Bytes.toBytes("cq0"),
cq1 = Bytes.toBytes("cq1"),
v0 = Bytes.toBytes("v0");
static long
ts = 55L;
static List<KeyValue> d = Lists.newArrayList();
static{
d.add(new KeyValue(rowA, cf, cq0, ts, v0));
d.add(new KeyValue(rowA, cf, cq1, ts, v0));
d.add(new KeyValue(rowB, cf, cq0, ts, v0));
d.add(new KeyValue(rowB, cf, cq1, ts, v0));
d.add(new KeyValue(rowBB, cf, cq0, ts, v0));
d.add(new KeyValue(rowBB, cf, cq1, ts, v0));
}
@Override
public List<KeyValue> getInputs() {
return d;
}
}

View File

@ -1,61 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.codec.prefixtree.row.data;
import java.util.Collections;
import java.util.List;
import org.apache.hadoop.hbase.CellComparatorImpl;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.KeyValue.Type;
import org.apache.hadoop.hbase.codec.prefixtree.row.BaseTestRowData;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.shaded.com.google.common.collect.Lists;
public class TestRowDataNumberStrings extends BaseTestRowData{
static List<KeyValue> d = Lists.newArrayList();
static {
/**
* Test a string-encoded list of numbers. 0, 1, 10, 11 will sort as 0, 1, 10, 11 if strings
* <p/>
* This helped catch a bug with reverse scanning where it was jumping from the last leaf cell to
* the previous nub. It should do 11->10, but it was incorrectly doing 11->1
*/
List<Integer> problematicSeries = Lists.newArrayList(0, 1, 10, 11);//sort this at the end
for(Integer i : problematicSeries){
// for(int i=0; i < 13; ++i){
byte[] row = Bytes.toBytes(""+i);
byte[] family = Bytes.toBytes("F");
byte[] column = Bytes.toBytes("C");
byte[] value = Bytes.toBytes("V");
d.add(new KeyValue(row, family, column, 0L, Type.Put, value));
}
Collections.sort(d, CellComparatorImpl.COMPARATOR);
}
@Override
public List<KeyValue> getInputs() {
return d;
}
}

View File

@ -1,58 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.codec.prefixtree.row.data;
import java.util.List;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.codec.prefixtree.row.BaseTestRowData;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.shaded.com.google.common.collect.Lists;
public class TestRowDataQualifierByteOrdering extends BaseTestRowData{
static byte[]
Arow = Bytes.toBytes("Arow"),
Brow = Bytes.toBytes("Brow"),
Brow2 = Bytes.toBytes("Brow2"),
fam = Bytes.toBytes("HappyFam"),
cq0 = Bytes.toBytes("cq0"),
cq1 = Bytes.toBytes("cq1tail"),//make sure tail does not come back as liat
cq2 = Bytes.toBytes("cq2"),
v0 = Bytes.toBytes("v0");
static long ts = 55L;
static List<KeyValue> d = Lists.newArrayList();
static {
d.add(new KeyValue(Arow, fam, cq0, ts, v0));
d.add(new KeyValue(Arow, fam, cq1, ts, v0));
d.add(new KeyValue(Brow, fam, cq0, ts, v0));
d.add(new KeyValue(Brow, fam, cq2, ts, v0));
d.add(new KeyValue(Brow2, fam, cq1, ts, v0));
d.add(new KeyValue(Brow2, fam, cq2, ts, v0));
}
@Override
public List<KeyValue> getInputs() {
return d;
}
}

View File

@ -1,42 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.codec.prefixtree.row.data;
import java.util.List;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.codec.prefixtree.row.BaseTestRowData;
import org.apache.hadoop.hbase.util.RedundantKVGenerator;
import org.apache.hadoop.hbase.shaded.com.google.common.collect.Lists;
public class TestRowDataRandomKeyValues extends BaseTestRowData {
static List<KeyValue> d = Lists.newArrayList();
static RedundantKVGenerator generator = new RedundantKVGenerator();
static {
d = generator.generateTestKeyValues(1 << 10);
}
@Override
public List<KeyValue> getInputs() {
return d;
}
}

View File

@ -1,41 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.codec.prefixtree.row.data;
import java.util.List;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.codec.prefixtree.row.BaseTestRowData;
import org.apache.hadoop.hbase.util.RedundantKVGenerator;
import org.apache.hadoop.hbase.shaded.com.google.common.collect.Lists;
/**
* Generated KVs with tags
*/
public class TestRowDataRandomKeyValuesWithTags extends BaseTestRowData {
static List<KeyValue> d = Lists.newArrayList();
static RedundantKVGenerator generator = new RedundantKVGenerator();
static {
d = generator.generateTestKeyValues(1 << 10, true);
}
@Override
public List<KeyValue> getInputs() {
return d;
}
}

View File

@ -1,74 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.codec.prefixtree.row.data;
import java.io.ByteArrayOutputStream;
import java.io.DataOutputStream;
import java.io.IOException;
import java.util.List;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.codec.prefixtree.row.BaseTestRowData;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.shaded.com.google.common.collect.Lists;
public class TestRowDataSearchWithPrefix extends BaseTestRowData {
static byte[] cf = Bytes.toBytes("cf");
static byte[] cq = Bytes.toBytes("cq");
static byte[] v = Bytes.toBytes("v");
static List<KeyValue> d = Lists.newArrayList();
static long ts = 55L;
static byte[] createRowKey(int keyPart1, int keyPart2) {
ByteArrayOutputStream bos = new ByteArrayOutputStream(16);
DataOutputStream dos = new DataOutputStream(bos);
try {
dos.writeInt(keyPart1);
dos.writeInt(keyPart2);
} catch (IOException e) {
// should not happen
throw new RuntimeException(e);
}
return bos.toByteArray();
}
static {
d.add(new KeyValue(createRowKey(1, 12345), cf, cq, ts, v));
d.add(new KeyValue(createRowKey(12345, 0x01000000), cf, cq, ts, v));
d.add(new KeyValue(createRowKey(12345, 0x01010000), cf, cq, ts, v));
d.add(new KeyValue(createRowKey(12345, 0x02000000), cf, cq, ts, v));
d.add(new KeyValue(createRowKey(12345, 0x02020000), cf, cq, ts, v));
d.add(new KeyValue(createRowKey(12345, 0x03000000), cf, cq, ts, v));
d.add(new KeyValue(createRowKey(12345, 0x03030000), cf, cq, ts, v));
d.add(new KeyValue(createRowKey(12345, 0x04000000), cf, cq, ts, v));
d.add(new KeyValue(createRowKey(12345, 0x04040000), cf, cq, ts, v));
}
@Override
public List<KeyValue> getInputs() {
return d;
}
}

View File

@ -1,128 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.codec.prefixtree.row.data;
import java.io.IOException;
import java.util.List;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.codec.prefixtree.row.BaseTestRowData;
import org.apache.hadoop.hbase.codec.prefixtree.scanner.CellScannerPosition;
import org.apache.hadoop.hbase.codec.prefixtree.scanner.CellSearcher;
import org.apache.hadoop.hbase.util.Bytes;
import org.junit.Assert;
import org.apache.hadoop.hbase.shaded.com.google.common.collect.Lists;
public class TestRowDataSearcherRowMiss extends BaseTestRowData{
static byte[]
//don't let the rows share any common prefix bytes
A = Bytes.toBytes("A"),
AA = Bytes.toBytes("AA"),
AAA = Bytes.toBytes("AAA"),
B = Bytes.toBytes("B"),
cf = Bytes.toBytes("fam"),
cq = Bytes.toBytes("cq0"),
v = Bytes.toBytes("v0");
static long
ts = 55L;
static List<KeyValue> d = Lists.newArrayList();
static{
d.add(new KeyValue(A, cf, cq, ts, v));
d.add(new KeyValue(AA, cf, cq, ts, v));
d.add(new KeyValue(AAA, cf, cq, ts, v));
d.add(new KeyValue(B, cf, cq, ts, v));
}
@Override
public List<KeyValue> getInputs() {
return d;
}
@Override
public void individualSearcherAssertions(CellSearcher searcher) {
assertRowOffsetsCorrect();
searcher.resetToBeforeFirstEntry();
//test first cell
try {
searcher.advance();
} catch (IOException e) {
throw new RuntimeException(e);
}
Cell first = searcher.current();
Assert.assertTrue(CellUtil.equals(d.get(0), first));
//test first cell in second row
Assert.assertTrue(searcher.positionAt(d.get(1)));
Assert.assertTrue(CellUtil.equals(d.get(1), searcher.current()));
testBetween1and2(searcher);
testBetween2and3(searcher);
}
/************ private methods, call from above *******************/
private void assertRowOffsetsCorrect(){
Assert.assertEquals(4, getRowStartIndexes().size());
}
private void testBetween1and2(CellSearcher searcher){
CellScannerPosition p;//reuse
Cell betweenAAndAAA = new KeyValue(AA, cf, cq, ts-2, v);
//test exact
Assert.assertFalse(searcher.positionAt(betweenAAndAAA));
//test atOrBefore
p = searcher.positionAtOrBefore(betweenAAndAAA);
Assert.assertEquals(CellScannerPosition.BEFORE, p);
Assert.assertTrue(CellUtil.equals(searcher.current(), d.get(1)));
//test atOrAfter
p = searcher.positionAtOrAfter(betweenAAndAAA);
Assert.assertEquals(CellScannerPosition.AFTER, p);
Assert.assertTrue(CellUtil.equals(searcher.current(), d.get(2)));
}
private void testBetween2and3(CellSearcher searcher){
CellScannerPosition p;//reuse
Cell betweenAAAndB = new KeyValue(AAA, cf, cq, ts-2, v);
//test exact
Assert.assertFalse(searcher.positionAt(betweenAAAndB));
//test atOrBefore
p = searcher.positionAtOrBefore(betweenAAAndB);
Assert.assertEquals(CellScannerPosition.BEFORE, p);
Assert.assertTrue(CellUtil.equals(searcher.current(), d.get(2)));
//test atOrAfter
p = searcher.positionAtOrAfter(betweenAAAndB);
Assert.assertEquals(CellScannerPosition.AFTER, p);
Assert.assertTrue(CellUtil.equals(searcher.current(), d.get(3)));
}
}

View File

@ -1,117 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.codec.prefixtree.row.data;
import java.io.IOException;
import java.util.List;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.codec.prefixtree.row.BaseTestRowData;
import org.apache.hadoop.hbase.codec.prefixtree.scanner.CellScannerPosition;
import org.apache.hadoop.hbase.codec.prefixtree.scanner.CellSearcher;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.CollectionUtils;
import org.junit.Assert;
import org.apache.hadoop.hbase.shaded.com.google.common.collect.Lists;
public class TestRowDataSimple extends BaseTestRowData {
static byte[]
// don't let the rows share any common prefix bytes
rowA = Bytes.toBytes("Arow"),
rowB = Bytes.toBytes("Brow"), cf = Bytes.toBytes("fam"),
cq0 = Bytes.toBytes("cq0"),
cq1 = Bytes.toBytes("cq1tail"),// make sure tail does not come back as liat
cq2 = Bytes.toBytes("dcq2"),// start with a different character
v0 = Bytes.toBytes("v0");
static long ts = 55L;
static List<KeyValue> d = Lists.newArrayList();
static {
d.add(new KeyValue(rowA, cf, cq0, ts, v0));
d.add(new KeyValue(rowA, cf, cq1, ts, v0));
d.add(new KeyValue(rowA, cf, cq2, ts, v0));
d.add(new KeyValue(rowB, cf, cq0, ts, v0));
d.add(new KeyValue(rowB, cf, cq1, ts, v0));
d.add(new KeyValue(rowB, cf, cq2, ts, v0));
}
@Override
public List<KeyValue> getInputs() {
return d;
}
@Override
public void individualSearcherAssertions(CellSearcher searcher) {
CellScannerPosition p;// reuse
searcher.resetToBeforeFirstEntry();
// test first cell
try {
searcher.advance();
} catch (IOException e) {
throw new RuntimeException(e);
}
Cell first = searcher.current();
Assert.assertTrue(CellUtil.equals(d.get(0), first));
// test first cell in second row
Assert.assertTrue(searcher.positionAt(d.get(3)));
Assert.assertTrue(CellUtil.equals(d.get(3), searcher.current()));
Cell between4And5 = new KeyValue(rowB, cf, cq1, ts - 2, v0);
// test exact
Assert.assertFalse(searcher.positionAt(between4And5));
// test atOrBefore
p = searcher.positionAtOrBefore(between4And5);
Assert.assertEquals(CellScannerPosition.BEFORE, p);
Assert.assertTrue(CellUtil.equals(searcher.current(), d.get(4)));
// test atOrAfter
p = searcher.positionAtOrAfter(between4And5);
Assert.assertEquals(CellScannerPosition.AFTER, p);
Assert.assertTrue(CellUtil.equals(searcher.current(), d.get(5)));
// test when key falls before first key in block
Cell beforeFirst = new KeyValue(Bytes.toBytes("A"), cf, cq0, ts, v0);
Assert.assertFalse(searcher.positionAt(beforeFirst));
p = searcher.positionAtOrBefore(beforeFirst);
Assert.assertEquals(CellScannerPosition.BEFORE_FIRST, p);
p = searcher.positionAtOrAfter(beforeFirst);
Assert.assertEquals(CellScannerPosition.AFTER, p);
Assert.assertTrue(CellUtil.equals(searcher.current(), d.get(0)));
Assert.assertEquals(d.get(0), searcher.current());
// test when key falls after last key in block
Cell afterLast = new KeyValue(Bytes.toBytes("z"), cf, cq0, ts, v0);// must be lower case z
Assert.assertFalse(searcher.positionAt(afterLast));
p = searcher.positionAtOrAfter(afterLast);
Assert.assertEquals(CellScannerPosition.AFTER_LAST, p);
p = searcher.positionAtOrBefore(afterLast);
Assert.assertEquals(CellScannerPosition.BEFORE, p);
Assert.assertTrue(CellUtil.equals(searcher.current(), CollectionUtils.getLast(d)));
}
}

View File

@ -1,52 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.codec.prefixtree.row.data;
import java.util.List;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeTestConstants;
import org.apache.hadoop.hbase.codec.prefixtree.row.BaseTestRowData;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.shaded.com.google.common.collect.Lists;
public class TestRowDataSingleQualifier extends BaseTestRowData{
static byte[]
rowA = Bytes.toBytes("rowA"),
rowB = Bytes.toBytes("rowB"),
cf = PrefixTreeTestConstants.TEST_CF,
cq0 = Bytes.toBytes("cq0"),
v0 = Bytes.toBytes("v0");
static long ts = 55L;
static List<KeyValue> d = Lists.newArrayList();
static {
d.add(new KeyValue(rowA, cf, cq0, ts, v0));
d.add(new KeyValue(rowB, cf, cq0, ts, v0));
}
@Override
public List<KeyValue> getInputs() {
return d;
}
}

View File

@ -1,74 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.codec.prefixtree.row.data;
import java.util.List;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.KeyValueUtil;
import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeBlockMeta;
import org.apache.hadoop.hbase.codec.prefixtree.row.BaseTestRowData;
import org.apache.hadoop.hbase.codec.prefixtree.scanner.CellScannerPosition;
import org.apache.hadoop.hbase.codec.prefixtree.scanner.CellSearcher;
import org.apache.hadoop.hbase.util.Bytes;
import org.junit.Assert;
import org.apache.hadoop.hbase.shaded.com.google.common.collect.Lists;
public class TestRowDataTrivial extends BaseTestRowData{
static byte[]
rA = Bytes.toBytes("rA"),
rB = Bytes.toBytes("rB"),//turn "r" into a branch for the Searcher tests
cf = Bytes.toBytes("fam"),
cq0 = Bytes.toBytes("q0"),
v0 = Bytes.toBytes("v0");
static long ts = 55L;
static List<KeyValue> d = Lists.newArrayList();
static {
d.add(new KeyValue(rA, cf, cq0, ts, v0));
d.add(new KeyValue(rB, cf, cq0, ts, v0));
}
@Override
public List<KeyValue> getInputs() {
return d;
}
@Override
public void individualBlockMetaAssertions(PrefixTreeBlockMeta blockMeta) {
// node[0] -> root[r]
// node[1] -> leaf[A], etc
Assert.assertEquals(2, blockMeta.getRowTreeDepth());
}
@Override
public void individualSearcherAssertions(CellSearcher searcher) {
/**
* The searcher should get a token mismatch on the "r" branch. Assert that it skips not only rA,
* but rB as well.
*/
KeyValue afterLast = KeyValueUtil.createFirstOnRow(Bytes.toBytes("zzz"));
CellScannerPosition position = searcher.positionAtOrAfter(afterLast);
Assert.assertEquals(CellScannerPosition.AFTER_LAST, position);
Assert.assertNull(searcher.current());
}
}

View File

@ -1,81 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.codec.prefixtree.row.data;
import java.util.ArrayList;
import java.util.List;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.KeyValueUtil;
import org.apache.hadoop.hbase.Tag;
import org.apache.hadoop.hbase.ArrayBackedTag;
import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeBlockMeta;
import org.apache.hadoop.hbase.codec.prefixtree.row.BaseTestRowData;
import org.apache.hadoop.hbase.codec.prefixtree.scanner.CellScannerPosition;
import org.apache.hadoop.hbase.codec.prefixtree.scanner.CellSearcher;
import org.apache.hadoop.hbase.util.Bytes;
import org.junit.Assert;
import org.apache.hadoop.hbase.shaded.com.google.common.collect.Lists;
public class TestRowDataTrivialWithTags extends BaseTestRowData{
static byte[] rA = Bytes.toBytes("rA"), rB = Bytes.toBytes("rB"),// turn "r"
// into a
// branch for
// the
// Searcher
// tests
cf = Bytes.toBytes("fam"), cq0 = Bytes.toBytes("q0"), v0 = Bytes.toBytes("v0");
static long ts = 55L;
static List<KeyValue> d = Lists.newArrayList();
static {
List<Tag> tagList = new ArrayList<>(2);
Tag t = new ArrayBackedTag((byte) 1, "visisbility");
tagList.add(t);
t = new ArrayBackedTag((byte) 2, "ACL");
tagList.add(t);
d.add(new KeyValue(rA, cf, cq0, ts, v0, tagList));
d.add(new KeyValue(rB, cf, cq0, ts, v0, tagList));
}
@Override
public List<KeyValue> getInputs() {
return d;
}
@Override
public void individualBlockMetaAssertions(PrefixTreeBlockMeta blockMeta) {
// node[0] -> root[r]
// node[1] -> leaf[A], etc
Assert.assertEquals(2, blockMeta.getRowTreeDepth());
}
@Override
public void individualSearcherAssertions(CellSearcher searcher) {
/**
* The searcher should get a token mismatch on the "r" branch. Assert that
* it skips not only rA, but rB as well.
*/
KeyValue afterLast = KeyValueUtil.createFirstOnRow(Bytes.toBytes("zzz"));
CellScannerPosition position = searcher.positionAtOrAfter(afterLast);
Assert.assertEquals(CellScannerPosition.AFTER_LAST, position);
Assert.assertNull(searcher.current());
}
}

View File

@ -1,99 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.codec.prefixtree.row.data;
import java.util.ArrayList;
import java.util.List;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeTestConstants;
import org.apache.hadoop.hbase.codec.prefixtree.row.BaseTestRowData;
import org.apache.hadoop.hbase.util.ByteRange;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.SimpleMutableByteRange;
import org.apache.hadoop.hbase.util.byterange.impl.ByteRangeTreeSet;
import org.apache.hadoop.hbase.shaded.com.google.common.collect.Lists;
/*
* test different timestamps
*
* http://pastebin.com/7ks8kzJ2
* http://pastebin.com/MPn03nsK
*/
public class TestRowDataUrls extends BaseTestRowData{
static List<ByteRange> rows;
static{
List<String> rowStrings = new ArrayList<>(16);
rowStrings.add("com.edsBlog/directoryAa/pageAaa");
rowStrings.add("com.edsBlog/directoryAa/pageBbb");
rowStrings.add("com.edsBlog/directoryAa/pageCcc");
rowStrings.add("com.edsBlog/directoryAa/pageDdd");
rowStrings.add("com.edsBlog/directoryBb/pageEee");
rowStrings.add("com.edsBlog/directoryBb/pageFff");
rowStrings.add("com.edsBlog/directoryBb/pageGgg");
rowStrings.add("com.edsBlog/directoryBb/pageHhh");
rowStrings.add("com.isabellasBlog/directoryAa/pageAaa");
rowStrings.add("com.isabellasBlog/directoryAa/pageBbb");
rowStrings.add("com.isabellasBlog/directoryAa/pageCcc");
rowStrings.add("com.isabellasBlog/directoryAa/pageDdd");
rowStrings.add("com.isabellasBlog/directoryBb/pageEee");
rowStrings.add("com.isabellasBlog/directoryBb/pageFff");
rowStrings.add("com.isabellasBlog/directoryBb/pageGgg");
rowStrings.add("com.isabellasBlog/directoryBb/pageHhh");
ByteRangeTreeSet ba = new ByteRangeTreeSet();
for (String row : rowStrings) {
ba.add(new SimpleMutableByteRange(Bytes.toBytes(row)));
}
rows = ba.compile().getSortedRanges();
}
static List<String> cols = Lists.newArrayList();
static {
cols.add("Chrome");
cols.add("Chromeb");
cols.add("Firefox");
cols.add("InternetExplorer");
cols.add("Opera");
cols.add("Safari");
}
static long ts = 1234567890;
static int MAX_VALUE = 50;
static List<KeyValue> kvs = Lists.newArrayList();
static {
for (ByteRange row : rows) {
for (String col : cols) {
KeyValue kv = new KeyValue(row.deepCopyToNewArray(), PrefixTreeTestConstants.TEST_CF,
Bytes.toBytes(col), ts, KeyValue.Type.Put, Bytes.toBytes("VALUE"));
kvs.add(kv);
// System.out.println("TestRows5:"+kv);
}
}
}
@Override
public List<KeyValue> getInputs() {
return kvs;
}
}

View File

@ -1,126 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.codec.prefixtree.row.data;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.util.List;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.KeyValueTestUtil;
import org.apache.hadoop.hbase.codec.prefixtree.encode.PrefixTreeEncoder;
import org.apache.hadoop.hbase.codec.prefixtree.encode.column.ColumnNodeWriter;
import org.apache.hadoop.hbase.codec.prefixtree.encode.row.RowNodeWriter;
import org.apache.hadoop.hbase.codec.prefixtree.encode.tokenize.TokenizerNode;
import org.apache.hadoop.hbase.codec.prefixtree.row.BaseTestRowData;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.shaded.com.google.common.collect.Lists;
/*
* test different timestamps
*
* http://pastebin.com/7ks8kzJ2
* http://pastebin.com/MPn03nsK
*/
public class TestRowDataUrlsExample extends BaseTestRowData{
static String TENANT_ID = Integer.toString(95322);
static String APP_ID = Integer.toString(12);
static List<String> URLS = Lists.newArrayList(
"com.dablog/2011/10/04/boating",
"com.dablog/2011/10/09/lasers",
"com.jamiesrecipes", //this nub helped find a bug
"com.jamiesrecipes/eggs");
static String FAMILY = "hits";
static List<String> BROWSERS = Lists.newArrayList(
"Chrome", "IE8", "IE9beta");//, "Opera", "Safari");
static long TIMESTAMP = 1234567890;
static int MAX_VALUE = 50;
static List<KeyValue> kvs = Lists.newArrayList();
static{
for(String rowKey : URLS){
for(String qualifier : BROWSERS){
KeyValue kv = new KeyValue(
Bytes.toBytes(rowKey),
Bytes.toBytes(FAMILY),
Bytes.toBytes(qualifier),
TIMESTAMP,
KeyValue.Type.Put,
Bytes.toBytes("VvvV"));
kvs.add(kv);
}
}
}
/**
* Used for generating docs.
*/
public static void main(String... args) throws IOException{
System.out.println("-- inputs --");
System.out.println(KeyValueTestUtil.toStringWithPadding(kvs, true));
ByteArrayOutputStream os = new ByteArrayOutputStream(1<<20);
PrefixTreeEncoder encoder = new PrefixTreeEncoder(os, false);
for(KeyValue kv : kvs){
encoder.write(kv);
}
encoder.flush();
System.out.println("-- qualifier SortedPtBuilderNodes --");
for(TokenizerNode tokenizer : encoder.getQualifierWriter().getNonLeaves()){
System.out.println(tokenizer);
}
for(TokenizerNode tokenizerNode : encoder.getQualifierWriter().getLeaves()){
System.out.println(tokenizerNode);
}
System.out.println("-- qualifier PtColumnNodeWriters --");
for(ColumnNodeWriter writer : encoder.getQualifierWriter().getColumnNodeWriters()){
System.out.println(writer);
}
System.out.println("-- rowKey SortedPtBuilderNodes --");
for(TokenizerNode tokenizerNode : encoder.getRowWriter().getNonLeaves()){
System.out.println(tokenizerNode);
}
for(TokenizerNode tokenizerNode : encoder.getRowWriter().getLeaves()){
System.out.println(tokenizerNode);
}
System.out.println("-- row PtRowNodeWriters --");
for(RowNodeWriter writer : encoder.getRowWriter().getNonLeafWriters()){
System.out.println(writer);
}
for(RowNodeWriter writer : encoder.getRowWriter().getLeafWriters()){
System.out.println(writer);
}
System.out.println("-- concatenated values --");
System.out.println(Bytes.toStringBinary(encoder.getValueByteRange().deepCopyToNewArray()));
}
@Override
public List<KeyValue> getInputs() {
return kvs;
}
}

View File

@ -1,45 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.codec.prefixtree.timestamp;
import java.util.Collection;
import java.util.List;
import org.apache.hadoop.hbase.codec.prefixtree.timestamp.data.TestTimestampDataBasic;
import org.apache.hadoop.hbase.codec.prefixtree.timestamp.data.TestTimestampDataNumbers;
import org.apache.hadoop.hbase.codec.prefixtree.timestamp.data.TestTimestampDataRepeats;
import org.apache.hadoop.hbase.shaded.com.google.common.collect.Lists;
public interface TestTimestampData {
List<Long> getInputs();
long getMinimum();
List<Long> getOutputs();
class InMemory {
public Collection<Object[]> getAllAsObjectArray() {
List<Object[]> all = Lists.newArrayList();
all.add(new Object[] { new TestTimestampDataBasic() });
all.add(new Object[] { new TestTimestampDataNumbers() });
all.add(new Object[] { new TestTimestampDataRepeats() });
return all;
}
}
}

View File

@ -1,98 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.codec.prefixtree.timestamp;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.Collection;
import org.apache.hadoop.hbase.nio.SingleByteBuff;
import org.apache.hadoop.hbase.testclassification.MiscTests;
import org.apache.hadoop.hbase.testclassification.SmallTests;
import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeBlockMeta;
import org.apache.hadoop.hbase.codec.prefixtree.decode.timestamp.TimestampDecoder;
import org.apache.hadoop.hbase.codec.prefixtree.encode.other.LongEncoder;
import org.junit.Assert;
import org.junit.Test;
import org.junit.experimental.categories.Category;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
import org.junit.runners.Parameterized.Parameters;
@Category({MiscTests.class, SmallTests.class})
@RunWith(Parameterized.class)
public class TestTimestampEncoder {
@Parameters
public static Collection<Object[]> parameters() {
return new TestTimestampData.InMemory().getAllAsObjectArray();
}
private TestTimestampData timestamps;
private PrefixTreeBlockMeta blockMeta;
private LongEncoder encoder;
private byte[] bytes;
private TimestampDecoder decoder;
public TestTimestampEncoder(TestTimestampData testTimestamps) throws IOException {
this.timestamps = testTimestamps;
this.blockMeta = new PrefixTreeBlockMeta();
this.blockMeta.setNumMetaBytes(0);
this.blockMeta.setNumRowBytes(0);
this.blockMeta.setNumQualifierBytes(0);
this.encoder = new LongEncoder();
for (Long ts : testTimestamps.getInputs()) {
encoder.add(ts);
}
encoder.compile();
blockMeta.setTimestampFields(encoder);
bytes = encoder.getByteArray();
decoder = new TimestampDecoder();
decoder.initOnBlock(blockMeta, new SingleByteBuff(ByteBuffer.wrap(bytes)));
}
@Test
public void testCompressorMinimum() {
Assert.assertEquals(timestamps.getMinimum(), encoder.getMin());
}
@Test
public void testCompressorRoundTrip() {
long[] outputs = encoder.getSortedUniqueTimestamps();
for (int i = 0; i < timestamps.getOutputs().size(); ++i) {
long input = timestamps.getOutputs().get(i);
long output = outputs[i];
Assert.assertEquals(input, output);
}
}
@Test
public void testReaderMinimum() {
Assert.assertEquals(timestamps.getMinimum(), decoder.getLong(0));
}
@Test
public void testReaderRoundTrip() {
for (int i = 0; i < timestamps.getOutputs().size(); ++i) {
long input = timestamps.getOutputs().get(i);
long output = decoder.getLong(i);
Assert.assertEquals(input, output);
}
}
}

View File

@ -1,54 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.codec.prefixtree.timestamp.data;
import java.util.ArrayList;
import java.util.List;
import org.apache.hadoop.hbase.codec.prefixtree.timestamp.TestTimestampData;
public class TestTimestampDataBasic implements TestTimestampData {
@Override
public List<Long> getInputs() {
List<Long> d = new ArrayList<>(5);
d.add(5L);
d.add(3L);
d.add(0L);
d.add(1L);
d.add(3L);
return d;
}
@Override
public long getMinimum() {
return 0L;
}
@Override
public List<Long> getOutputs() {
List<Long> d = new ArrayList<>(4);
d.add(0L);
d.add(1L);
d.add(3L);
d.add(5L);
return d;
}
}

View File

@ -1,56 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.codec.prefixtree.timestamp.data;
import java.util.ArrayList;
import java.util.List;
import org.apache.hadoop.hbase.codec.prefixtree.timestamp.TestTimestampData;
public class TestTimestampDataNumbers implements TestTimestampData {
private int shift = 8;
@Override
public List<Long> getInputs() {
List<Long> d = new ArrayList<>(5);
d.add(5L << shift);
d.add(3L << shift);
d.add(7L << shift);
d.add(1L << shift);
d.add(3L << shift);
return d;
}
@Override
public long getMinimum() {
return 1L << shift;
}
@Override
public List<Long> getOutputs() {
List<Long> d = new ArrayList<>(4);
d.add(1L << shift);
d.add(3L << shift);
d.add(5L << shift);
d.add(7L << shift);
return d;
}
}

View File

@ -1,52 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.codec.prefixtree.timestamp.data;
import java.util.ArrayList;
import java.util.List;
import org.apache.hadoop.hbase.codec.prefixtree.timestamp.TestTimestampData;
public class TestTimestampDataRepeats implements TestTimestampData {
private static long t = 1234567890L;
@Override
public List<Long> getInputs() {
List<Long> d = new ArrayList<>(5);
d.add(t);
d.add(t);
d.add(t);
d.add(t);
d.add(t);
return d;
}
@Override
public long getMinimum() {
return t;
}
@Override
public List<Long> getOutputs() {
List<Long> d = new ArrayList<>();
return d;
}
}

View File

@ -1,39 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.util.bytes;
import junit.framework.Assert;
import org.apache.hadoop.hbase.testclassification.MiscTests;
import org.apache.hadoop.hbase.testclassification.SmallTests;
import org.apache.hadoop.hbase.util.ByteRange;
import org.apache.hadoop.hbase.util.SimpleMutableByteRange;
import org.junit.Test;
import org.junit.experimental.categories.Category;
@Category({MiscTests.class, SmallTests.class})
public class TestByteRange {
@Test
public void testConstructor() {
ByteRange b = new SimpleMutableByteRange(new byte[] { 0, 1, 2 });
Assert.assertEquals(3, b.getLength());
}
}

View File

@ -1,32 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.util.comparator;
import java.util.Comparator;
import org.apache.hadoop.hbase.util.Bytes;
public class ByteArrayComparator implements Comparator<byte[]> {
@Override
public int compare(byte[] a, byte[] b) {
return Bytes.compareTo(a, b);
}
}

View File

@ -1,33 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.util.number;
import java.text.DecimalFormat;
public class NumberFormatter {
public static String addCommas(final Number pValue) {
if (pValue == null) {
return null;
}
String format = "###,###,###,###,###,###,###,###.#####################";
return new DecimalFormat(format).format(pValue);// biggest is 19 digits
}
}

View File

@ -1,34 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.util.number;
import java.util.Random;
public class RandomNumberUtils {
public static long nextPositiveLong(Random random) {
while (true) {
long value = random.nextLong();
if (value > 0) {
return value;
}
}
}
}

View File

@ -1,126 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.util.vint;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import org.apache.hadoop.hbase.testclassification.MiscTests;
import org.apache.hadoop.hbase.testclassification.SmallTests;
import org.junit.Assert;
import org.junit.Test;
import org.junit.experimental.categories.Category;
/********************** tests *************************/
@Category({MiscTests.class, SmallTests.class})
public class TestFIntTool {
@Test
public void testLeadingZeros() {
Assert.assertEquals(64, Long.numberOfLeadingZeros(0));
Assert.assertEquals(63, Long.numberOfLeadingZeros(1));
Assert.assertEquals(0, Long.numberOfLeadingZeros(Long.MIN_VALUE));
Assert.assertEquals(0, Long.numberOfLeadingZeros(-1));
Assert.assertEquals(1, Long.numberOfLeadingZeros(Long.MAX_VALUE));
Assert.assertEquals(1, Long.numberOfLeadingZeros(Long.MAX_VALUE - 1));
}
@Test
public void testMaxValueForNumBytes() {
Assert.assertEquals(255, UFIntTool.maxValueForNumBytes(1));
Assert.assertEquals(65535, UFIntTool.maxValueForNumBytes(2));
Assert.assertEquals(0xffffff, UFIntTool.maxValueForNumBytes(3));
Assert.assertEquals(0xffffffffffffffL, UFIntTool.maxValueForNumBytes(7));
}
@Test
public void testNumBytes() {
Assert.assertEquals(1, UFIntTool.numBytes(0));
Assert.assertEquals(1, UFIntTool.numBytes(1));
Assert.assertEquals(1, UFIntTool.numBytes(255));
Assert.assertEquals(2, UFIntTool.numBytes(256));
Assert.assertEquals(2, UFIntTool.numBytes(65535));
Assert.assertEquals(3, UFIntTool.numBytes(65536));
Assert.assertEquals(4, UFIntTool.numBytes(0xffffffffL));
Assert.assertEquals(5, UFIntTool.numBytes(0x100000000L));
Assert.assertEquals(4, UFIntTool.numBytes(Integer.MAX_VALUE));
Assert.assertEquals(8, UFIntTool.numBytes(Long.MAX_VALUE));
Assert.assertEquals(8, UFIntTool.numBytes(Long.MAX_VALUE - 1));
}
@Test
public void testGetBytes() {
Assert.assertArrayEquals(new byte[] { 0 }, UFIntTool.getBytes(1, 0));
Assert.assertArrayEquals(new byte[] { 1 }, UFIntTool.getBytes(1, 1));
Assert.assertArrayEquals(new byte[] { -1 }, UFIntTool.getBytes(1, 255));
Assert.assertArrayEquals(new byte[] { 1, 0 }, UFIntTool.getBytes(2, 256));
Assert.assertArrayEquals(new byte[] { 1, 3 }, UFIntTool.getBytes(2, 256 + 3));
Assert.assertArrayEquals(new byte[] { 1, -128 }, UFIntTool.getBytes(2, 256 + 128));
Assert.assertArrayEquals(new byte[] { 1, -1 }, UFIntTool.getBytes(2, 256 + 255));
Assert.assertArrayEquals(new byte[] { 127, -1, -1, -1 },
UFIntTool.getBytes(4, Integer.MAX_VALUE));
Assert.assertArrayEquals(new byte[] { 127, -1, -1, -1, -1, -1, -1, -1 },
UFIntTool.getBytes(8, Long.MAX_VALUE));
}
@Test
public void testFromBytes() {
Assert.assertEquals(0, UFIntTool.fromBytes(new byte[] { 0 }));
Assert.assertEquals(1, UFIntTool.fromBytes(new byte[] { 1 }));
Assert.assertEquals(255, UFIntTool.fromBytes(new byte[] { -1 }));
Assert.assertEquals(256, UFIntTool.fromBytes(new byte[] { 1, 0 }));
Assert.assertEquals(256 + 3, UFIntTool.fromBytes(new byte[] { 1, 3 }));
Assert.assertEquals(256 + 128, UFIntTool.fromBytes(new byte[] { 1, -128 }));
Assert.assertEquals(256 + 255, UFIntTool.fromBytes(new byte[] { 1, -1 }));
Assert.assertEquals(Integer.MAX_VALUE, UFIntTool.fromBytes(new byte[] { 127, -1, -1, -1 }));
Assert.assertEquals(Long.MAX_VALUE,
UFIntTool.fromBytes(new byte[] { 127, -1, -1, -1, -1, -1, -1, -1 }));
}
@Test
public void testRoundTrips() {
long[] values = new long[] { 0, 1, 2, 255, 256, 31123, 65535, 65536, 65537, 0xfffffeL,
0xffffffL, 0x1000000L, 0x1000001L, Integer.MAX_VALUE - 1, Integer.MAX_VALUE,
(long) Integer.MAX_VALUE + 1, Long.MAX_VALUE - 1, Long.MAX_VALUE };
for (int i = 0; i < values.length; ++i) {
Assert.assertEquals(values[i], UFIntTool.fromBytes(UFIntTool.getBytes(8, values[i])));
}
}
@Test
public void testWriteBytes() throws IOException {// copied from testGetBytes
Assert.assertArrayEquals(new byte[] { 0 }, bytesViaOutputStream(1, 0));
Assert.assertArrayEquals(new byte[] { 1 }, bytesViaOutputStream(1, 1));
Assert.assertArrayEquals(new byte[] { -1 }, bytesViaOutputStream(1, 255));
Assert.assertArrayEquals(new byte[] { 1, 0 }, bytesViaOutputStream(2, 256));
Assert.assertArrayEquals(new byte[] { 1, 3 }, bytesViaOutputStream(2, 256 + 3));
Assert.assertArrayEquals(new byte[] { 1, -128 }, bytesViaOutputStream(2, 256 + 128));
Assert.assertArrayEquals(new byte[] { 1, -1 }, bytesViaOutputStream(2, 256 + 255));
Assert.assertArrayEquals(new byte[] { 127, -1, -1, -1 },
bytesViaOutputStream(4, Integer.MAX_VALUE));
Assert.assertArrayEquals(new byte[] { 127, -1, -1, -1, -1, -1, -1, -1 },
bytesViaOutputStream(8, Long.MAX_VALUE));
}
private byte[] bytesViaOutputStream(int outputWidth, long value) throws IOException {
ByteArrayOutputStream os = new ByteArrayOutputStream();
UFIntTool.writeBytes(outputWidth, value, os);
return os.toByteArray();
}
}

View File

@ -1,105 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.util.vint;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.Random;
import org.apache.hadoop.hbase.nio.SingleByteBuff;
import org.apache.hadoop.hbase.testclassification.MiscTests;
import org.apache.hadoop.hbase.testclassification.SmallTests;
import org.junit.Assert;
import org.junit.Test;
import org.junit.experimental.categories.Category;
@Category({MiscTests.class, SmallTests.class})
public class TestVIntTool {
@Test
public void testNumBytes() {
Assert.assertEquals(1, UVIntTool.numBytes(0));
Assert.assertEquals(1, UVIntTool.numBytes(1));
Assert.assertEquals(1, UVIntTool.numBytes(100));
Assert.assertEquals(1, UVIntTool.numBytes(126));
Assert.assertEquals(1, UVIntTool.numBytes(127));
Assert.assertEquals(2, UVIntTool.numBytes(128));
Assert.assertEquals(2, UVIntTool.numBytes(129));
Assert.assertEquals(5, UVIntTool.numBytes(Integer.MAX_VALUE));
}
@Test
public void testWriteBytes() throws IOException {
Assert.assertArrayEquals(new byte[] { 0 }, bytesViaOutputStream(0));
Assert.assertArrayEquals(new byte[] { 1 }, bytesViaOutputStream(1));
Assert.assertArrayEquals(new byte[] { 63 }, bytesViaOutputStream(63));
Assert.assertArrayEquals(new byte[] { 127 }, bytesViaOutputStream(127));
Assert.assertArrayEquals(new byte[] { -128, 1 }, bytesViaOutputStream(128));
Assert.assertArrayEquals(new byte[] { -128 + 27, 1 }, bytesViaOutputStream(155));
Assert.assertArrayEquals(UVIntTool.MAX_VALUE_BYTES, bytesViaOutputStream(Integer.MAX_VALUE));
}
private byte[] bytesViaOutputStream(int value) throws IOException {
ByteArrayOutputStream os = new ByteArrayOutputStream();
UVIntTool.writeBytes(value, os);
return os.toByteArray();
}
@Test
public void testToBytes() {
Assert.assertArrayEquals(new byte[] { 0 }, UVIntTool.getBytes(0));
Assert.assertArrayEquals(new byte[] { 1 }, UVIntTool.getBytes(1));
Assert.assertArrayEquals(new byte[] { 63 }, UVIntTool.getBytes(63));
Assert.assertArrayEquals(new byte[] { 127 }, UVIntTool.getBytes(127));
Assert.assertArrayEquals(new byte[] { -128, 1 }, UVIntTool.getBytes(128));
Assert.assertArrayEquals(new byte[] { -128 + 27, 1 }, UVIntTool.getBytes(155));
Assert.assertArrayEquals(UVIntTool.MAX_VALUE_BYTES, UVIntTool.getBytes(Integer.MAX_VALUE));
}
@Test
public void testFromBytes() {
Assert.assertEquals(Integer.MAX_VALUE,
UVIntTool.getInt(new SingleByteBuff(ByteBuffer.wrap(UVIntTool.MAX_VALUE_BYTES)), 0));
}
@Test
public void testRoundTrips() {
Random random = new Random();
for (int i = 0; i < 10000; ++i) {
int value = random.nextInt(Integer.MAX_VALUE);
byte[] bytes = UVIntTool.getBytes(value);
int roundTripped = UVIntTool.getInt(new SingleByteBuff(ByteBuffer.wrap(bytes)), 0);
Assert.assertEquals(value, roundTripped);
}
}
@Test
public void testInputStreams() throws IOException {
ByteArrayInputStream is;
is = new ByteArrayInputStream(new byte[] { 0 });
Assert.assertEquals(0, UVIntTool.getInt(is));
is = new ByteArrayInputStream(new byte[] { 5 });
Assert.assertEquals(5, UVIntTool.getInt(is));
is = new ByteArrayInputStream(new byte[] { -128 + 27, 1 });
Assert.assertEquals(155, UVIntTool.getInt(is));
}
}

View File

@ -1,113 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.util.vint;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.Random;
import org.apache.hadoop.hbase.nio.SingleByteBuff;
import org.apache.hadoop.hbase.testclassification.MiscTests;
import org.apache.hadoop.hbase.testclassification.SmallTests;
import org.apache.hadoop.hbase.util.number.RandomNumberUtils;
import org.junit.Assert;
import org.junit.Test;
import org.junit.experimental.categories.Category;
@Category({MiscTests.class, SmallTests.class})
public class TestVLongTool {
@Test
public void testNumBytes() {
Assert.assertEquals(1, UVLongTool.numBytes(0));
Assert.assertEquals(1, UVLongTool.numBytes(1));
Assert.assertEquals(1, UVLongTool.numBytes(100));
Assert.assertEquals(1, UVLongTool.numBytes(126));
Assert.assertEquals(1, UVLongTool.numBytes(127));
Assert.assertEquals(2, UVLongTool.numBytes(128));
Assert.assertEquals(2, UVLongTool.numBytes(129));
Assert.assertEquals(9, UVLongTool.numBytes(Long.MAX_VALUE));
}
@Test
public void testToBytes() {
Assert.assertArrayEquals(new byte[] { 0 }, UVLongTool.getBytes(0));
Assert.assertArrayEquals(new byte[] { 1 }, UVLongTool.getBytes(1));
Assert.assertArrayEquals(new byte[] { 63 }, UVLongTool.getBytes(63));
Assert.assertArrayEquals(new byte[] { 127 }, UVLongTool.getBytes(127));
Assert.assertArrayEquals(new byte[] { -128, 1 }, UVLongTool.getBytes(128));
Assert.assertArrayEquals(new byte[] { -128 + 27, 1 }, UVLongTool.getBytes(155));
Assert.assertArrayEquals(UVLongTool.MAX_VALUE_BYTES, UVLongTool.getBytes(Long.MAX_VALUE));
}
@Test
public void testFromBytes() {
Assert.assertEquals(Long.MAX_VALUE, UVLongTool.getLong(UVLongTool.MAX_VALUE_BYTES));
}
@Test
public void testFromBytesOffset() {
Assert.assertEquals(Long.MAX_VALUE,
UVLongTool.getLong(new SingleByteBuff(ByteBuffer.wrap(UVLongTool.MAX_VALUE_BYTES)), 0));
long ms = 1318966363481L;
// System.out.println(ms);
byte[] bytes = UVLongTool.getBytes(ms);
// System.out.println(Arrays.toString(bytes));
long roundTripped = UVLongTool.getLong(new SingleByteBuff(ByteBuffer.wrap(bytes)), 0);
Assert.assertEquals(ms, roundTripped);
int calculatedNumBytes = UVLongTool.numBytes(ms);
int actualNumBytes = bytes.length;
Assert.assertEquals(actualNumBytes, calculatedNumBytes);
byte[] shiftedBytes = new byte[1000];
int shift = 33;
System.arraycopy(bytes, 0, shiftedBytes, shift, bytes.length);
long shiftedRoundTrip =
UVLongTool.getLong(new SingleByteBuff(ByteBuffer.wrap(shiftedBytes)), shift);
Assert.assertEquals(ms, shiftedRoundTrip);
}
@Test
public void testRoundTrips() {
Random random = new Random();
for (int i = 0; i < 10000; ++i) {
long value = RandomNumberUtils.nextPositiveLong(random);
byte[] bytes = UVLongTool.getBytes(value);
long roundTripped = UVLongTool.getLong(bytes);
Assert.assertEquals(value, roundTripped);
int calculatedNumBytes = UVLongTool.numBytes(value);
int actualNumBytes = bytes.length;
Assert.assertEquals(actualNumBytes, calculatedNumBytes);
}
}
@Test
public void testInputStreams() throws IOException {
ByteArrayInputStream is;
is = new ByteArrayInputStream(new byte[] { 0 });
Assert.assertEquals(0, UVLongTool.getLong(is));
is = new ByteArrayInputStream(new byte[] { 5 });
Assert.assertEquals(5, UVLongTool.getLong(is));
is = new ByteArrayInputStream(new byte[] { -128 + 27, 1 });
Assert.assertEquals(155, UVLongTool.getLong(is));
}
}

View File

@ -1,68 +0,0 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Define some default values that can be overridden by system properties
hbase.root.logger=INFO,console
hbase.log.dir=.
hbase.log.file=hbase.log
# Define the root logger to the system property "hbase.root.logger".
log4j.rootLogger=${hbase.root.logger}
# Logging Threshold
log4j.threshold=ALL
#
# Daily Rolling File Appender
#
log4j.appender.DRFA=org.apache.log4j.DailyRollingFileAppender
log4j.appender.DRFA.File=${hbase.log.dir}/${hbase.log.file}
# Rollver at midnight
log4j.appender.DRFA.DatePattern=.yyyy-MM-dd
# 30-day backup
#log4j.appender.DRFA.MaxBackupIndex=30
log4j.appender.DRFA.layout=org.apache.log4j.PatternLayout
# Debugging Pattern format
log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %-5p [%t] %C{2}(%L): %m%n
#
# console
# Add "console" to rootlogger above if you want to use this
#
log4j.appender.console=org.apache.log4j.ConsoleAppender
log4j.appender.console.target=System.err
log4j.appender.console.layout=org.apache.log4j.PatternLayout
log4j.appender.console.layout.ConversionPattern=%d{ISO8601} %-5p [%t] %C{2}(%L): %m%n
# Custom Logging levels
#log4j.logger.org.apache.hadoop.fs.FSNamesystem=DEBUG
log4j.logger.org.apache.hadoop=WARN
log4j.logger.org.apache.zookeeper=ERROR
log4j.logger.org.apache.hadoop.hbase=DEBUG
#These settings are workarounds against spurious logs from the minicluster.
#See HBASE-4709
log4j.logger.org.apache.hadoop.metrics2.impl.MetricsConfig=WARN
log4j.logger.org.apache.hadoop.metrics2.impl.MetricsSinkAdapter=WARN
log4j.logger.org.apache.hadoop.metrics2.impl.MetricsSystemImpl=WARN
log4j.logger.org.apache.hadoop.metrics2.util.MBeans=WARN
# Enable this to get detailed connection error/retry logging.
# log4j.logger.org.apache.hadoop.hbase.client.ConnectionImplementation=TRACE

View File

@ -392,11 +392,6 @@
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-replication</artifactId>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-prefix-tree</artifactId>
<scope>runtime</scope>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-common</artifactId>

View File

@ -792,11 +792,7 @@ public class HFileWriterImpl implements HFile.Writer {
int avgValueLen =
entryCount == 0 ? 0 : (int) (totalValueLength / entryCount);
fileInfo.append(FileInfo.AVG_VALUE_LEN, Bytes.toBytes(avgValueLen), false);
if (hFileContext.getDataBlockEncoding() == DataBlockEncoding.PREFIX_TREE) {
// In case of Prefix Tree encoding, we always write tags information into HFiles even if all
// KVs are having no tags.
fileInfo.append(FileInfo.MAX_TAGS_LEN, Bytes.toBytes(this.maxTagsLength), false);
} else if (hFileContext.isIncludesTags()) {
if (hFileContext.isIncludesTags()) {
// When tags are not being written in this file, MAX_TAGS_LEN is excluded
// from the FileInfo
fileInfo.append(FileInfo.MAX_TAGS_LEN, Bytes.toBytes(this.maxTagsLength), false);

View File

@ -85,7 +85,6 @@ import org.apache.hadoop.hbase.filter.SingleColumnValueFilter;
import org.apache.hadoop.hbase.filter.SubstringComparator;
import org.apache.hadoop.hbase.filter.ValueFilter;
import org.apache.hadoop.hbase.filter.WhileMatchFilter;
import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
import org.apache.hadoop.hbase.io.hfile.BlockCache;
import org.apache.hadoop.hbase.io.hfile.CacheConfig;
import org.apache.hadoop.hbase.ipc.CoprocessorRpcChannel;
@ -218,7 +217,6 @@ public class TestFromClientSide {
final byte[] T3 = Bytes.toBytes("T3");
HColumnDescriptor hcd = new HColumnDescriptor(FAMILY)
.setKeepDeletedCells(KeepDeletedCells.TRUE)
.setDataBlockEncoding(DataBlockEncoding.PREFIX_TREE)
.setMaxVersions(3);
HTableDescriptor desc = new HTableDescriptor(tableName);

View File

@ -42,9 +42,7 @@ import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.PrivateCellUtil;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.KeyValue.Type;
import org.apache.hadoop.hbase.KeyValueUtil;
import org.apache.hadoop.hbase.Tag;
import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeSeeker;
import org.apache.hadoop.hbase.io.ByteArrayOutputStream;
import org.apache.hadoop.hbase.io.compress.Compression;
import org.apache.hadoop.hbase.io.hfile.HFileContext;
@ -194,9 +192,6 @@ public class TestDataBlockEncoders {
List<DataBlockEncoder.EncodedSeeker> encodedSeekers = new ArrayList<>();
for (DataBlockEncoding encoding : DataBlockEncoding.values()) {
LOG.info("Encoding: " + encoding);
// Off heap block data support not added for PREFIX_TREE DBE yet.
// TODO remove this once support is added. HBASE-12298
if (this.useOffheapData && encoding == DataBlockEncoding.PREFIX_TREE) continue;
DataBlockEncoder encoder = encoding.getEncoder();
if (encoder == null) {
continue;
@ -271,9 +266,6 @@ public class TestDataBlockEncoders {
List<KeyValue> sampleKv = generator.generateTestKeyValues(NUMBER_OF_KV, includesTags);
for (DataBlockEncoding encoding : DataBlockEncoding.values()) {
// Off heap block data support not added for PREFIX_TREE DBE yet.
// TODO remove this once support is added. HBASE-12298
if (this.useOffheapData && encoding == DataBlockEncoding.PREFIX_TREE) continue;
if (encoding.getEncoder() == null) {
continue;
}
@ -317,9 +309,6 @@ public class TestDataBlockEncoders {
List<KeyValue> sampleKv = generator.generateTestKeyValues(NUMBER_OF_KV, includesTags);
for (DataBlockEncoding encoding : DataBlockEncoding.values()) {
// Off heap block data support not added for PREFIX_TREE DBE yet.
// TODO remove this once support is added. HBASE-12298
if (this.useOffheapData && encoding == DataBlockEncoding.PREFIX_TREE) continue;
if (encoding.getEncoder() == null) {
continue;
}
@ -346,12 +335,7 @@ public class TestDataBlockEncoders {
Cell actualKeyValue = seeker.getCell();
ByteBuffer actualKey = null;
if (seeker instanceof PrefixTreeSeeker) {
byte[] serializedKey = PrivateCellUtil.getCellKeySerializedAsKeyValueKey(seeker.getKey());
actualKey = ByteBuffer.wrap(KeyValueUtil.createKeyValueFromKey(serializedKey).getKey());
} else {
actualKey = ByteBuffer.wrap(((KeyValue) seeker.getKey()).getKey());
}
actualKey = ByteBuffer.wrap(((KeyValue) seeker.getKey()).getKey());
ByteBuffer actualValue = seeker.getValueShallowCopy();
if (expectedKeyValue != null) {

View File

@ -1,192 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.io.encoding;
import static org.junit.Assert.assertArrayEquals;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellScanner;
import org.apache.hadoop.hbase.HBaseTestingUtility;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.regionserver.HRegion;
import org.apache.hadoop.hbase.regionserver.Region;
import org.apache.hadoop.hbase.regionserver.RegionScanner;
import org.apache.hadoop.hbase.testclassification.IOTests;
import org.apache.hadoop.hbase.testclassification.SmallTests;
import org.apache.hadoop.hbase.util.Bytes;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import org.junit.experimental.categories.Category;
@Category({ IOTests.class, SmallTests.class })
public class TestPrefixTree {
private static final String row4 = "a-b-B-2-1402397300-1402416535";
private static final byte[] row4_bytes = Bytes.toBytes(row4);
private static final String row3 = "a-b-A-1-1402397227-1402415999";
private static final byte[] row3_bytes = Bytes.toBytes(row3);
private static final String row2 = "a-b-A-1-1402329600-1402396277";
private static final byte[] row2_bytes = Bytes.toBytes(row2);
private static final String row1 = "a-b-A-1";
private static final byte[] row1_bytes = Bytes.toBytes(row1);
private final static byte[] fam = Bytes.toBytes("cf_1");
private final static byte[] qual1 = Bytes.toBytes("qf_1");
private final static byte[] qual2 = Bytes.toBytes("qf_2");
private final HBaseTestingUtility testUtil = new HBaseTestingUtility();
private HRegion region;
@Before
public void setUp() throws Exception {
TableName tableName = TableName.valueOf(getClass().getSimpleName());
HTableDescriptor htd = new HTableDescriptor(tableName);
htd.addFamily(new HColumnDescriptor(fam).setDataBlockEncoding(DataBlockEncoding.PREFIX_TREE));
HRegionInfo info = new HRegionInfo(tableName, null, null, false);
Path path = testUtil.getDataTestDir(getClass().getSimpleName());
region = HBaseTestingUtility.createRegionAndWAL(info, path, testUtil.getConfiguration(), htd);
}
@After
public void tearDown() throws Exception {
HBaseTestingUtility.closeRegionAndWAL(region);
testUtil.cleanupTestDir();
}
@Test
public void testHBASE11728() throws Exception {
Put put = new Put(Bytes.toBytes("a-b-0-0"));
put.addColumn(fam, qual1, Bytes.toBytes("c1-value"));
region.put(put);
put = new Put(row1_bytes);
put.addColumn(fam, qual1, Bytes.toBytes("c1-value"));
region.put(put);
put = new Put(row2_bytes);
put.addColumn(fam, qual2, Bytes.toBytes("c2-value"));
region.put(put);
put = new Put(row3_bytes);
put.addColumn(fam, qual2, Bytes.toBytes("c2-value-2"));
region.put(put);
put = new Put(row4_bytes);
put.addColumn(fam, qual2, Bytes.toBytes("c2-value-3"));
region.put(put);
region.flush(true);
String[] rows = new String[3];
rows[0] = row1;
rows[1] = row2;
rows[2] = row3;
byte[][] val = new byte[3][];
val[0] = Bytes.toBytes("c1-value");
val[1] = Bytes.toBytes("c2-value");
val[2] = Bytes.toBytes("c2-value-2");
Scan scan = new Scan();
scan.setStartRow(row1_bytes);
scan.setStopRow(Bytes.toBytes("a-b-A-1:"));
RegionScanner scanner = region.getScanner(scan);
List<Cell> cells = new ArrayList<>();
for (int i = 0; i < 3; i++) {
assertEquals(i < 2, scanner.next(cells));
CellScanner cellScanner = Result.create(cells).cellScanner();
while (cellScanner.advance()) {
assertEquals(rows[i], Bytes.toString(cellScanner.current().getRowArray(), cellScanner
.current().getRowOffset(), cellScanner.current().getRowLength()));
assertEquals(Bytes.toString(val[i]), Bytes.toString(cellScanner.current().getValueArray(),
cellScanner.current().getValueOffset(), cellScanner.current().getValueLength()));
}
cells.clear();
}
scanner.close();
// Add column
scan = new Scan();
scan.addColumn(fam, qual2);
scan.setStartRow(row1_bytes);
scan.setStopRow(Bytes.toBytes("a-b-A-1:"));
scanner = region.getScanner(scan);
for (int i = 1; i < 3; i++) {
assertEquals(i < 2, scanner.next(cells));
CellScanner cellScanner = Result.create(cells).cellScanner();
while (cellScanner.advance()) {
assertEquals(rows[i], Bytes.toString(cellScanner.current().getRowArray(), cellScanner
.current().getRowOffset(), cellScanner.current().getRowLength()));
}
cells.clear();
}
scanner.close();
scan = new Scan();
scan.addColumn(fam, qual2);
scan.setStartRow(Bytes.toBytes("a-b-A-1-"));
scan.setStopRow(Bytes.toBytes("a-b-A-1:"));
scanner = region.getScanner(scan);
for (int i = 1; i < 3; i++) {
assertEquals(i < 2, scanner.next(cells));
CellScanner cellScanner = Result.create(cells).cellScanner();
while (cellScanner.advance()) {
assertEquals(rows[i], Bytes.toString(cellScanner.current().getRowArray(), cellScanner
.current().getRowOffset(), cellScanner.current().getRowLength()));
}
cells.clear();
}
scanner.close();
scan = new Scan();
scan.addColumn(fam, qual2);
scan.setStartRow(Bytes.toBytes("a-b-A-1-140239"));
scan.setStopRow(Bytes.toBytes("a-b-A-1:"));
scanner = region.getScanner(scan);
assertFalse(scanner.next(cells));
assertFalse(cells.isEmpty());
scanner.close();
}
@Test
public void testHBASE12817() throws IOException {
for (int i = 0; i < 100; i++) {
region
.put(new Put(Bytes.toBytes("obj" + (2900 + i))).addColumn(fam, qual1, Bytes.toBytes(i)));
}
region.put(new Put(Bytes.toBytes("obj299")).addColumn(fam, qual1, Bytes.toBytes("whatever")));
region.put(new Put(Bytes.toBytes("obj29")).addColumn(fam, qual1, Bytes.toBytes("whatever")));
region.put(new Put(Bytes.toBytes("obj2")).addColumn(fam, qual1, Bytes.toBytes("whatever")));
region.put(new Put(Bytes.toBytes("obj3")).addColumn(fam, qual1, Bytes.toBytes("whatever")));
region.flush(true);
Scan scan = new Scan(Bytes.toBytes("obj29995"));
RegionScanner scanner = region.getScanner(scan);
List<Cell> cells = new ArrayList<>();
assertFalse(scanner.next(cells));
assertArrayEquals(Bytes.toBytes("obj3"), Result.create(cells).getRow());
}
}

View File

@ -1,338 +0,0 @@
/**
* Copyright The Apache Software Foundation
*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with this
* work for additional information regarding copyright ownership. The ASF
* licenses this file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*/
package org.apache.hadoop.hbase.io.encoding;
import static org.junit.Assert.assertArrayEquals;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.fail;
import java.io.ByteArrayOutputStream;
import java.io.DataOutputStream;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Random;
import java.util.concurrent.ConcurrentSkipListSet;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellComparatorImpl;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.PrivateCellUtil;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.KeyValueUtil;
import org.apache.hadoop.hbase.Tag;
import org.apache.hadoop.hbase.ArrayBackedTag;
import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeCodec;
import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
import org.apache.hadoop.hbase.io.encoding.DataBlockEncoder.EncodedSeeker;
import org.apache.hadoop.hbase.io.hfile.HFileContext;
import org.apache.hadoop.hbase.io.hfile.HFileContextBuilder;
import org.apache.hadoop.hbase.nio.SingleByteBuff;
import org.apache.hadoop.hbase.testclassification.IOTests;
import org.apache.hadoop.hbase.testclassification.SmallTests;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.CollectionBackedScanner;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
import org.junit.experimental.categories.Category;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
import org.junit.runners.Parameterized.Parameters;
/**
* Tests scanning/seeking data with PrefixTree Encoding.
*/
@RunWith(Parameterized.class)
@Category({IOTests.class, SmallTests.class})
public class TestPrefixTreeEncoding {
private static final Log LOG = LogFactory.getLog(TestPrefixTreeEncoding.class);
private static final String CF = "EncodingTestCF";
private static final byte[] CF_BYTES = Bytes.toBytes(CF);
private static final int NUM_ROWS_PER_BATCH = 50;
private static final int NUM_COLS_PER_ROW = 20;
private int numBatchesWritten = 0;
private ConcurrentSkipListSet<Cell> kvset = new ConcurrentSkipListSet<>(CellComparatorImpl.COMPARATOR);
private static boolean formatRowNum = false;
@Parameters
public static Collection<Object[]> parameters() {
List<Object[]> paramList = new ArrayList<>();
{
paramList.add(new Object[] { false });
paramList.add(new Object[] { true });
}
return paramList;
}
private final boolean includesTag;
public TestPrefixTreeEncoding(boolean includesTag) {
this.includesTag = includesTag;
}
@Before
public void setUp() throws Exception {
kvset.clear();
formatRowNum = false;
}
@Test
public void testSeekBeforeWithFixedData() throws Exception {
formatRowNum = true;
PrefixTreeCodec encoder = new PrefixTreeCodec();
int batchId = numBatchesWritten++;
HFileContext meta = new HFileContextBuilder()
.withHBaseCheckSum(false)
.withIncludesMvcc(false)
.withIncludesTags(includesTag)
.withCompression(Algorithm.NONE).build();
HFileBlockEncodingContext blkEncodingCtx = new HFileBlockDefaultEncodingContext(
DataBlockEncoding.PREFIX_TREE, new byte[0], meta);
ByteArrayOutputStream baosInMemory = new ByteArrayOutputStream();
DataOutputStream userDataStream = new DataOutputStream(baosInMemory);
generateFixedTestData(kvset, batchId, false, includesTag, encoder, blkEncodingCtx,
userDataStream);
EncodedSeeker seeker = encoder.createSeeker(CellComparatorImpl.COMPARATOR,
encoder.newDataBlockDecodingContext(meta));
byte[] onDiskBytes = baosInMemory.toByteArray();
ByteBuffer readBuffer = ByteBuffer.wrap(onDiskBytes, DataBlockEncoding.ID_SIZE,
onDiskBytes.length - DataBlockEncoding.ID_SIZE);
seeker.setCurrentBuffer(new SingleByteBuff(readBuffer));
// Seek before the first keyvalue;
Cell seekKey =
PrivateCellUtil.createFirstDeleteFamilyCellOnRow(getRowKey(batchId, 0), CF_BYTES);
seeker.seekToKeyInBlock(seekKey, true);
assertEquals(null, seeker.getCell());
// Seek before the middle keyvalue;
seekKey = PrivateCellUtil
.createFirstDeleteFamilyCellOnRow(getRowKey(batchId, NUM_ROWS_PER_BATCH / 3), CF_BYTES);
seeker.seekToKeyInBlock(seekKey, true);
assertNotNull(seeker.getCell());
assertArrayEquals(getRowKey(batchId, NUM_ROWS_PER_BATCH / 3 - 1),
CellUtil.cloneRow(seeker.getCell()));
// Seek before the last keyvalue;
seekKey = PrivateCellUtil.createFirstDeleteFamilyCellOnRow(Bytes.toBytes("zzzz"), CF_BYTES);
seeker.seekToKeyInBlock(seekKey, true);
assertNotNull(seeker.getCell());
assertArrayEquals(getRowKey(batchId, NUM_ROWS_PER_BATCH - 1),
CellUtil.cloneRow(seeker.getCell()));
}
@Test
public void testScanWithRandomData() throws Exception {
PrefixTreeCodec encoder = new PrefixTreeCodec();
ByteArrayOutputStream baosInMemory = new ByteArrayOutputStream();
DataOutputStream userDataStream = new DataOutputStream(baosInMemory);
HFileContext meta = new HFileContextBuilder()
.withHBaseCheckSum(false)
.withIncludesMvcc(false)
.withIncludesTags(includesTag)
.withCompression(Algorithm.NONE)
.build();
HFileBlockEncodingContext blkEncodingCtx = new HFileBlockDefaultEncodingContext(
DataBlockEncoding.PREFIX_TREE, new byte[0], meta);
generateRandomTestData(kvset, numBatchesWritten++, includesTag, encoder, blkEncodingCtx,
userDataStream);
EncodedSeeker seeker = encoder.createSeeker(CellComparatorImpl.COMPARATOR,
encoder.newDataBlockDecodingContext(meta));
byte[] onDiskBytes = baosInMemory.toByteArray();
ByteBuffer readBuffer = ByteBuffer.wrap(onDiskBytes, DataBlockEncoding.ID_SIZE,
onDiskBytes.length - DataBlockEncoding.ID_SIZE);
seeker.setCurrentBuffer(new SingleByteBuff(readBuffer));
Cell previousKV = null;
do {
Cell currentKV = seeker.getCell();
System.out.println(currentKV);
if (previousKV != null && CellComparatorImpl.COMPARATOR.compare(currentKV, previousKV) < 0) {
dumpInputKVSet();
fail("Current kv " + currentKV + " is smaller than previous keyvalue " + previousKV);
}
if (!includesTag) {
assertFalse(currentKV.getTagsLength() > 0);
} else {
Assert.assertTrue(currentKV.getTagsLength() > 0);
}
previousKV = currentKV;
} while (seeker.next());
}
@Test
public void testSeekWithRandomData() throws Exception {
PrefixTreeCodec encoder = new PrefixTreeCodec();
ByteArrayOutputStream baosInMemory = new ByteArrayOutputStream();
DataOutputStream userDataStream = new DataOutputStream(baosInMemory);
int batchId = numBatchesWritten++;
HFileContext meta = new HFileContextBuilder()
.withHBaseCheckSum(false)
.withIncludesMvcc(false)
.withIncludesTags(includesTag)
.withCompression(Algorithm.NONE)
.build();
HFileBlockEncodingContext blkEncodingCtx = new HFileBlockDefaultEncodingContext(
DataBlockEncoding.PREFIX_TREE, new byte[0], meta);
generateRandomTestData(kvset, batchId, includesTag, encoder, blkEncodingCtx, userDataStream);
EncodedSeeker seeker = encoder.createSeeker(CellComparatorImpl.COMPARATOR,
encoder.newDataBlockDecodingContext(meta));
byte[] onDiskBytes = baosInMemory.toByteArray();
ByteBuffer readBuffer = ByteBuffer.wrap(onDiskBytes, DataBlockEncoding.ID_SIZE,
onDiskBytes.length - DataBlockEncoding.ID_SIZE);
verifySeeking(seeker, readBuffer, batchId);
}
@Test
public void testSeekWithFixedData() throws Exception {
PrefixTreeCodec encoder = new PrefixTreeCodec();
int batchId = numBatchesWritten++;
HFileContext meta = new HFileContextBuilder()
.withHBaseCheckSum(false)
.withIncludesMvcc(false)
.withIncludesTags(includesTag)
.withCompression(Algorithm.NONE)
.build();
HFileBlockEncodingContext blkEncodingCtx = new HFileBlockDefaultEncodingContext(
DataBlockEncoding.PREFIX_TREE, new byte[0], meta);
ByteArrayOutputStream baosInMemory = new ByteArrayOutputStream();
DataOutputStream userDataStream = new DataOutputStream(baosInMemory);
generateFixedTestData(kvset, batchId, includesTag, encoder, blkEncodingCtx, userDataStream);
EncodedSeeker seeker = encoder.createSeeker(CellComparatorImpl.COMPARATOR,
encoder.newDataBlockDecodingContext(meta));
byte[] onDiskBytes = baosInMemory.toByteArray();
ByteBuffer readBuffer = ByteBuffer.wrap(onDiskBytes, DataBlockEncoding.ID_SIZE,
onDiskBytes.length - DataBlockEncoding.ID_SIZE);
verifySeeking(seeker, readBuffer, batchId);
}
private void verifySeeking(EncodedSeeker encodeSeeker,
ByteBuffer encodedData, int batchId) {
List<KeyValue> kvList = new ArrayList<>();
for (int i = 0; i < NUM_ROWS_PER_BATCH; ++i) {
kvList.clear();
encodeSeeker.setCurrentBuffer(new SingleByteBuff(encodedData));
KeyValue firstOnRow = KeyValueUtil.createFirstOnRow(getRowKey(batchId, i));
encodeSeeker.seekToKeyInBlock(
new KeyValue.KeyOnlyKeyValue(firstOnRow.getBuffer(), firstOnRow.getKeyOffset(),
firstOnRow.getKeyLength()), false);
boolean hasMoreOfEncodeScanner = encodeSeeker.next();
CollectionBackedScanner collectionScanner = new CollectionBackedScanner(
this.kvset);
boolean hasMoreOfCollectionScanner = collectionScanner.seek(firstOnRow);
if (hasMoreOfEncodeScanner != hasMoreOfCollectionScanner) {
dumpInputKVSet();
fail("Get error result after seeking " + firstOnRow);
}
if (hasMoreOfEncodeScanner) {
if (CellComparatorImpl.COMPARATOR.compare(encodeSeeker.getCell(),
collectionScanner.peek()) != 0) {
dumpInputKVSet();
fail("Expected " + collectionScanner.peek() + " actual "
+ encodeSeeker.getCell() + ", after seeking " + firstOnRow);
}
}
}
}
private void dumpInputKVSet() {
LOG.info("Dumping input keyvalue set in error case:");
for (Cell kv : kvset) {
System.out.println(kv);
}
}
private static void generateFixedTestData(ConcurrentSkipListSet<Cell> kvset, int batchId,
boolean useTags, PrefixTreeCodec encoder, HFileBlockEncodingContext blkEncodingCtx,
DataOutputStream userDataStream) throws Exception {
generateFixedTestData(kvset, batchId, true, useTags, encoder, blkEncodingCtx, userDataStream);
}
private static void generateFixedTestData(ConcurrentSkipListSet<Cell> kvset,
int batchId, boolean partial, boolean useTags, PrefixTreeCodec encoder,
HFileBlockEncodingContext blkEncodingCtx, DataOutputStream userDataStream) throws Exception {
for (int i = 0; i < NUM_ROWS_PER_BATCH; ++i) {
if (partial && i / 10 % 2 == 1)
continue;
for (int j = 0; j < NUM_COLS_PER_ROW; ++j) {
if (!useTags) {
KeyValue kv = new KeyValue(getRowKey(batchId, i), CF_BYTES, getQualifier(j), getValue(
batchId, i, j));
kvset.add(kv);
} else {
KeyValue kv = new KeyValue(getRowKey(batchId, i), CF_BYTES, getQualifier(j), 0l,
getValue(batchId, i, j), new Tag[] { new ArrayBackedTag((byte) 1, "metaValue1") });
kvset.add(kv);
}
}
}
encoder.startBlockEncoding(blkEncodingCtx, userDataStream);
for (Cell kv : kvset) {
encoder.encode(kv, blkEncodingCtx, userDataStream);
}
encoder.endBlockEncoding(blkEncodingCtx, userDataStream, null);
}
private static void generateRandomTestData(ConcurrentSkipListSet<Cell> kvset,
int batchId, boolean useTags, PrefixTreeCodec encoder,
HFileBlockEncodingContext blkEncodingCtx, DataOutputStream userDataStream) throws Exception {
Random random = new Random();
for (int i = 0; i < NUM_ROWS_PER_BATCH; ++i) {
if (random.nextInt(100) < 50)
continue;
for (int j = 0; j < NUM_COLS_PER_ROW; ++j) {
if (random.nextInt(100) < 50)
continue;
if (!useTags) {
KeyValue kv = new KeyValue(getRowKey(batchId, i), CF_BYTES, getQualifier(j), getValue(
batchId, i, j));
kvset.add(kv);
} else {
KeyValue kv = new KeyValue(getRowKey(batchId, i), CF_BYTES, getQualifier(j), 0l,
getValue(batchId, i, j), new Tag[] { new ArrayBackedTag((byte) 1, "metaValue1") });
kvset.add(kv);
}
}
}
encoder.startBlockEncoding(blkEncodingCtx, userDataStream);
for (Cell kv : kvset) {
encoder.encode(kv, blkEncodingCtx, userDataStream);
}
encoder.endBlockEncoding(blkEncodingCtx, userDataStream, null);
}
private static byte[] getRowKey(int batchId, int i) {
return Bytes
.toBytes("batch" + batchId + "_row" + (formatRowNum ? String.format("%04d", i) : i));
}
private static byte[] getQualifier(int j) {
return Bytes.toBytes("colfdfafhfhsdfhsdfh" + j);
}
private static byte[] getValue(int batchId, int i, int j) {
return Bytes.toBytes("value_for_" + Bytes.toString(getRowKey(batchId, i)) + "_col" + j);
}
}

View File

@ -272,10 +272,9 @@ public class TestSeekToBlockWithEncoders {
// create all seekers
List<DataBlockEncoder.EncodedSeeker> encodedSeekers = new ArrayList<>();
for (DataBlockEncoding encoding : DataBlockEncoding.values()) {
if (encoding.getEncoder() == null || encoding == DataBlockEncoding.PREFIX_TREE) {
if (encoding.getEncoder() == null) {
continue;
}
DataBlockEncoder encoder = encoding.getEncoder();
HFileContext meta = new HFileContextBuilder().withHBaseCheckSum(false)
.withIncludesMvcc(false).withIncludesTags(false)

View File

@ -149,10 +149,6 @@ public class TestHFileDataBlockEncoder {
@Test
public void testEncodingWithOffheapKeyValue() throws IOException {
// usually we have just block without headers, but don't complicate that
if(blockEncoder.getDataBlockEncoding() == DataBlockEncoding.PREFIX_TREE) {
// This is a TODO: Only after PrefixTree is fixed we can remove this check
return;
}
try {
List<Cell> kvs = generator.generateTestExtendedOffheapKeyValues(60, true);
HFileContext meta = new HFileContextBuilder().withIncludesMvcc(includesMemstoreTS)

View File

@ -316,12 +316,7 @@ public class TestSeekTo {
assertEquals("i", toRowStr(scanner.getCell()));
assertEquals(1, scanner.seekTo(toKV("l", tagUsage)));
if (encoding == DataBlockEncoding.PREFIX_TREE) {
// TODO : Fix this
assertEquals(null, scanner.getCell());
} else {
assertEquals("k", toRowStr(scanner.getCell()));
}
assertEquals("k", toRowStr(scanner.getCell()));
reader.close();
deleteTestDir(fs);

View File

@ -116,8 +116,7 @@ public class TestTags {
HTableDescriptor desc = new HTableDescriptor(tableName);
HColumnDescriptor colDesc = new HColumnDescriptor(fam);
colDesc.setBlockCacheEnabled(true);
// colDesc.setDataBlockEncoding(DataBlockEncoding.NONE);
colDesc.setDataBlockEncoding(DataBlockEncoding.PREFIX_TREE);
colDesc.setDataBlockEncoding(DataBlockEncoding.NONE);
desc.addFamily(colDesc);
Admin admin = TEST_UTIL.getAdmin();
admin.createTable(desc);
@ -183,7 +182,7 @@ public class TestTags {
HColumnDescriptor colDesc = new HColumnDescriptor(fam);
colDesc.setBlockCacheEnabled(true);
// colDesc.setDataBlockEncoding(DataBlockEncoding.NONE);
colDesc.setDataBlockEncoding(DataBlockEncoding.PREFIX_TREE);
// colDesc.setDataBlockEncoding(DataBlockEncoding.PREFIX_TREE);
desc.addFamily(colDesc);
Admin admin = TEST_UTIL.getAdmin();
admin.createTable(desc);

Some files were not shown because too many files have changed in this diff Show More