HBASE-19179 Remove hbase-prefix-tree
This commit is contained in:
parent
cdff80d976
commit
f8c58930aa
|
@ -39,7 +39,7 @@ public enum DataBlockEncoding {
|
|||
FAST_DIFF(4, "org.apache.hadoop.hbase.io.encoding.FastDiffDeltaEncoder"),
|
||||
// id 5 is reserved for the COPY_KEY algorithm for benchmarking
|
||||
// COPY_KEY(5, "org.apache.hadoop.hbase.io.encoding.CopyKeyDataBlockEncoder"),
|
||||
PREFIX_TREE(6, "org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeCodec"),
|
||||
// PREFIX_TREE(6, "org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeCodec"),
|
||||
ROW_INDEX_V1(7, "org.apache.hadoop.hbase.io.encoding.RowIndexCodecV1");
|
||||
|
||||
private final short id;
|
||||
|
|
|
@ -179,10 +179,6 @@
|
|||
<artifactId>slf4j-api</artifactId>
|
||||
<version>${slf4j.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.hbase</groupId>
|
||||
<artifactId>hbase-prefix-tree</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.htrace</groupId>
|
||||
<artifactId>htrace-core</artifactId>
|
||||
|
|
|
@ -801,21 +801,6 @@ public class TableMapReduceUtil {
|
|||
* @see <a href="https://issues.apache.org/jira/browse/PIG-3285">PIG-3285</a>
|
||||
*/
|
||||
public static void addHBaseDependencyJars(Configuration conf) throws IOException {
|
||||
|
||||
// PrefixTreeCodec is part of the hbase-prefix-tree module. If not included in MR jobs jar
|
||||
// dependencies, MR jobs that write encoded hfiles will fail.
|
||||
// We used reflection here so to prevent a circular module dependency.
|
||||
// TODO - if we extract the MR into a module, make it depend on hbase-prefix-tree.
|
||||
Class prefixTreeCodecClass = null;
|
||||
try {
|
||||
prefixTreeCodecClass =
|
||||
Class.forName("org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeCodec");
|
||||
} catch (ClassNotFoundException e) {
|
||||
// this will show up in unit tests but should not show in real deployments
|
||||
LOG.warn("The hbase-prefix-tree module jar containing PrefixTreeCodec is not present." +
|
||||
" Continuing without it.");
|
||||
}
|
||||
|
||||
addDependencyJarsForClasses(conf,
|
||||
// explicitly pull a class from each module
|
||||
org.apache.hadoop.hbase.HConstants.class, // hbase-common
|
||||
|
@ -828,8 +813,6 @@ public class TableMapReduceUtil {
|
|||
org.apache.hadoop.hbase.mapreduce.TableMapper.class, // hbase-mapreduce
|
||||
org.apache.hadoop.hbase.metrics.impl.FastLongHistogram.class, // hbase-metrics
|
||||
org.apache.hadoop.hbase.metrics.Snapshot.class, // hbase-metrics-api
|
||||
prefixTreeCodecClass, // hbase-prefix-tree (if null will be skipped)
|
||||
// pull necessary dependencies
|
||||
org.apache.zookeeper.ZooKeeper.class,
|
||||
org.apache.hadoop.hbase.shaded.io.netty.channel.Channel.class,
|
||||
com.google.protobuf.Message.class,
|
||||
|
|
|
@ -1,189 +0,0 @@
|
|||
<?xml version="1.0"?>
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<!--
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
-->
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
<parent>
|
||||
<artifactId>hbase-build-configuration</artifactId>
|
||||
<groupId>org.apache.hbase</groupId>
|
||||
<version>2.0.0-alpha4-SNAPSHOT</version>
|
||||
<relativePath>../hbase-build-configuration</relativePath>
|
||||
</parent>
|
||||
|
||||
<artifactId>hbase-prefix-tree</artifactId>
|
||||
<name>Apache HBase - Prefix Tree</name>
|
||||
<description>Prefix Tree Data Block Encoder</description>
|
||||
<!--REMOVE-->
|
||||
|
||||
<build>
|
||||
<plugins>
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-site-plugin</artifactId>
|
||||
<configuration>
|
||||
<skip>true</skip>
|
||||
</configuration>
|
||||
</plugin>
|
||||
<!-- Make a jar and put the sources in the jar -->
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-source-plugin</artifactId>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<!--Make it so assembly:single does nothing in here-->
|
||||
<artifactId>maven-assembly-plugin</artifactId>
|
||||
<configuration>
|
||||
<skipAssembly>true</skipAssembly>
|
||||
</configuration>
|
||||
</plugin>
|
||||
</plugins>
|
||||
<pluginManagement>
|
||||
<plugins>
|
||||
<!--This plugin's configuration is used to store Eclipse m2e settings
|
||||
only. It has no influence on the Maven build itself.-->
|
||||
<plugin>
|
||||
<groupId>org.eclipse.m2e</groupId>
|
||||
<artifactId>lifecycle-mapping</artifactId>
|
||||
<configuration>
|
||||
<lifecycleMappingMetadata>
|
||||
<pluginExecutions>
|
||||
<pluginExecution>
|
||||
<pluginExecutionFilter>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-compiler-plugin</artifactId>
|
||||
<versionRange>[3.2,)</versionRange>
|
||||
<goals>
|
||||
<goal>compile</goal>
|
||||
</goals>
|
||||
</pluginExecutionFilter>
|
||||
<action>
|
||||
<ignore></ignore>
|
||||
</action>
|
||||
</pluginExecution>
|
||||
</pluginExecutions>
|
||||
</lifecycleMappingMetadata>
|
||||
</configuration>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</pluginManagement>
|
||||
</build>
|
||||
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>org.apache.hbase</groupId>
|
||||
<artifactId>hbase-common</artifactId>
|
||||
<version>${project.version}</version>
|
||||
<classifier>tests</classifier>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.hbase</groupId>
|
||||
<artifactId>hbase-annotations</artifactId>
|
||||
<type>test-jar</type>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.hbase</groupId>
|
||||
<artifactId>hbase-common</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.hbase.thirdparty</groupId>
|
||||
<artifactId>hbase-shaded-miscellaneous</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>commons-logging</groupId>
|
||||
<artifactId>commons-logging</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>junit</groupId>
|
||||
<artifactId>junit</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
<profiles>
|
||||
<!-- Skip the tests in this module -->
|
||||
<profile>
|
||||
<id>skipPrefixTreeTests</id>
|
||||
<activation>
|
||||
<property>
|
||||
<name>skipPrefixTreeTests</name>
|
||||
</property>
|
||||
</activation>
|
||||
<properties>
|
||||
<surefire.skipFirstPart>true</surefire.skipFirstPart>
|
||||
<surefire.skipSecondPart>true</surefire.skipSecondPart>
|
||||
</properties>
|
||||
</profile>
|
||||
<!-- Profiles for building against different hadoop versions -->
|
||||
<!--
|
||||
profile for building against Hadoop 2.0.0-alpha. Activate using:
|
||||
mvn -Dhadoop.profile=2.0
|
||||
-->
|
||||
<profile>
|
||||
<id>hadoop-2.0</id>
|
||||
<activation>
|
||||
<property>
|
||||
<!--Below formatting for dev-support/generate-hadoopX-poms.sh-->
|
||||
<!--h2--><name>!hadoop.profile</name>
|
||||
</property>
|
||||
</activation>
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-common</artifactId>
|
||||
<exclusions>
|
||||
<exclusion>
|
||||
<groupId>com.google.guava</groupId>
|
||||
<artifactId>guava</artifactId>
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
</profile>
|
||||
<!--
|
||||
profile for building against Hadoop 3.0.x. Activate using:
|
||||
mvn -Dhadoop.profile=3.0
|
||||
-->
|
||||
<profile>
|
||||
<id>hadoop-3.0</id>
|
||||
<activation>
|
||||
<property>
|
||||
<name>hadoop.profile</name>
|
||||
<value>3.0</value>
|
||||
</property>
|
||||
</activation>
|
||||
<properties>
|
||||
<hadoop.version>3.0-SNAPSHOT</hadoop.version>
|
||||
</properties>
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-common</artifactId>
|
||||
<exclusions>
|
||||
<exclusion>
|
||||
<groupId>com.google.guava</groupId>
|
||||
<artifactId>guava</artifactId>
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
</profile>
|
||||
</profiles>
|
||||
</project>
|
|
@ -1,899 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.hbase.codec.prefixtree;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.OutputStream;
|
||||
|
||||
import org.apache.yetus.audience.InterfaceAudience;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.encode.other.LongEncoder;
|
||||
import org.apache.hadoop.hbase.nio.ByteBuff;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
import org.apache.hadoop.hbase.util.vint.UVIntTool;
|
||||
import org.apache.hadoop.hbase.util.vint.UVLongTool;
|
||||
|
||||
/**
|
||||
* Information about the block. Stored at the beginning of the byte[]. Contains things
|
||||
* like minimum timestamp and width of FInts in the row tree.
|
||||
*
|
||||
* Most fields stored in VInts that get decoded on the first access of each new block.
|
||||
*/
|
||||
@InterfaceAudience.Private
|
||||
public class PrefixTreeBlockMeta {
|
||||
|
||||
/******************* static fields ********************/
|
||||
|
||||
public static final int VERSION = 0;
|
||||
|
||||
public static final int MAX_FAMILY_LENGTH = Byte.MAX_VALUE;// hard-coded in KeyValue
|
||||
|
||||
public static final int
|
||||
NUM_LONGS = 2,
|
||||
NUM_INTS = 28,
|
||||
NUM_SHORTS = 0,//keyValueTypeWidth not persisted
|
||||
NUM_SINGLE_BYTES = 2,
|
||||
MAX_BYTES = Bytes.SIZEOF_LONG * NUM_LONGS
|
||||
+ Bytes.SIZEOF_SHORT * NUM_SHORTS
|
||||
+ Bytes.SIZEOF_INT * NUM_INTS
|
||||
+ NUM_SINGLE_BYTES;
|
||||
|
||||
|
||||
/**************** transient fields *********************/
|
||||
protected int bufferOffset;
|
||||
|
||||
|
||||
/**************** persisted fields **********************/
|
||||
|
||||
// PrefixTree version to allow future format modifications
|
||||
protected int version;
|
||||
protected int numMetaBytes;
|
||||
protected int numKeyValueBytes;
|
||||
protected boolean includesMvccVersion;//probably don't need this explicitly, but only 1 byte
|
||||
|
||||
// split the byte[] into 6 sections for the different data types
|
||||
protected int numRowBytes;
|
||||
protected int numFamilyBytes;
|
||||
protected int numQualifierBytes;
|
||||
protected int numTimestampBytes;
|
||||
protected int numMvccVersionBytes;
|
||||
protected int numValueBytes;
|
||||
protected int numTagsBytes;
|
||||
|
||||
// number of bytes in each section of fixed width FInts
|
||||
protected int nextNodeOffsetWidth;
|
||||
protected int familyOffsetWidth;
|
||||
protected int qualifierOffsetWidth;
|
||||
protected int timestampIndexWidth;
|
||||
protected int mvccVersionIndexWidth;
|
||||
protected int valueOffsetWidth;
|
||||
protected int valueLengthWidth;
|
||||
protected int tagsOffsetWidth;
|
||||
|
||||
// used to pre-allocate structures for reading
|
||||
protected int rowTreeDepth;
|
||||
protected int maxRowLength;
|
||||
protected int maxQualifierLength;
|
||||
protected int maxTagsLength;
|
||||
|
||||
// the timestamp from which the deltas are calculated
|
||||
protected long minTimestamp;
|
||||
protected int timestampDeltaWidth;
|
||||
protected long minMvccVersion;
|
||||
protected int mvccVersionDeltaWidth;
|
||||
|
||||
protected boolean allSameType;
|
||||
protected byte allTypes;
|
||||
|
||||
protected int numUniqueRows;
|
||||
protected int numUniqueFamilies;
|
||||
protected int numUniqueQualifiers;
|
||||
protected int numUniqueTags;
|
||||
|
||||
|
||||
/***************** constructors ********************/
|
||||
|
||||
public PrefixTreeBlockMeta() {
|
||||
}
|
||||
|
||||
public PrefixTreeBlockMeta(InputStream is) throws IOException{
|
||||
this.version = VERSION;
|
||||
this.bufferOffset = 0;
|
||||
readVariableBytesFromInputStream(is);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param buffer positioned at start of PtBlockMeta
|
||||
*/
|
||||
public PrefixTreeBlockMeta(ByteBuff buffer) {
|
||||
initOnBlock(buffer);
|
||||
}
|
||||
|
||||
public void initOnBlock(ByteBuff buffer) {
|
||||
bufferOffset = buffer.position();
|
||||
readVariableBytesFromBuffer(buffer, bufferOffset);
|
||||
}
|
||||
|
||||
|
||||
/**************** operate on each field **********************/
|
||||
|
||||
public int calculateNumMetaBytes(){
|
||||
int numBytes = 0;
|
||||
numBytes += UVIntTool.numBytes(version);
|
||||
numBytes += UVLongTool.numBytes(numMetaBytes);
|
||||
numBytes += UVIntTool.numBytes(numKeyValueBytes);
|
||||
++numBytes;//os.write(getIncludesMvccVersion());
|
||||
|
||||
numBytes += UVIntTool.numBytes(numRowBytes);
|
||||
numBytes += UVIntTool.numBytes(numFamilyBytes);
|
||||
numBytes += UVIntTool.numBytes(numQualifierBytes);
|
||||
numBytes += UVIntTool.numBytes(numTagsBytes);
|
||||
numBytes += UVIntTool.numBytes(numTimestampBytes);
|
||||
numBytes += UVIntTool.numBytes(numMvccVersionBytes);
|
||||
numBytes += UVIntTool.numBytes(numValueBytes);
|
||||
|
||||
numBytes += UVIntTool.numBytes(nextNodeOffsetWidth);
|
||||
numBytes += UVIntTool.numBytes(familyOffsetWidth);
|
||||
numBytes += UVIntTool.numBytes(qualifierOffsetWidth);
|
||||
numBytes += UVIntTool.numBytes(tagsOffsetWidth);
|
||||
numBytes += UVIntTool.numBytes(timestampIndexWidth);
|
||||
numBytes += UVIntTool.numBytes(mvccVersionIndexWidth);
|
||||
numBytes += UVIntTool.numBytes(valueOffsetWidth);
|
||||
numBytes += UVIntTool.numBytes(valueLengthWidth);
|
||||
|
||||
numBytes += UVIntTool.numBytes(rowTreeDepth);
|
||||
numBytes += UVIntTool.numBytes(maxRowLength);
|
||||
numBytes += UVIntTool.numBytes(maxQualifierLength);
|
||||
numBytes += UVIntTool.numBytes(maxTagsLength);
|
||||
|
||||
numBytes += UVLongTool.numBytes(minTimestamp);
|
||||
numBytes += UVIntTool.numBytes(timestampDeltaWidth);
|
||||
numBytes += UVLongTool.numBytes(minMvccVersion);
|
||||
numBytes += UVIntTool.numBytes(mvccVersionDeltaWidth);
|
||||
++numBytes;//os.write(getAllSameTypeByte());
|
||||
++numBytes;//os.write(allTypes);
|
||||
|
||||
numBytes += UVIntTool.numBytes(numUniqueRows);
|
||||
numBytes += UVIntTool.numBytes(numUniqueFamilies);
|
||||
numBytes += UVIntTool.numBytes(numUniqueQualifiers);
|
||||
numBytes += UVIntTool.numBytes(numUniqueTags);
|
||||
return numBytes;
|
||||
}
|
||||
|
||||
public void writeVariableBytesToOutputStream(OutputStream os) throws IOException{
|
||||
UVIntTool.writeBytes(version, os);
|
||||
UVIntTool.writeBytes(numMetaBytes, os);
|
||||
UVIntTool.writeBytes(numKeyValueBytes, os);
|
||||
os.write(getIncludesMvccVersionByte());
|
||||
|
||||
UVIntTool.writeBytes(numRowBytes, os);
|
||||
UVIntTool.writeBytes(numFamilyBytes, os);
|
||||
UVIntTool.writeBytes(numQualifierBytes, os);
|
||||
UVIntTool.writeBytes(numTagsBytes, os);
|
||||
UVIntTool.writeBytes(numTimestampBytes, os);
|
||||
UVIntTool.writeBytes(numMvccVersionBytes, os);
|
||||
UVIntTool.writeBytes(numValueBytes, os);
|
||||
|
||||
UVIntTool.writeBytes(nextNodeOffsetWidth, os);
|
||||
UVIntTool.writeBytes(familyOffsetWidth, os);
|
||||
UVIntTool.writeBytes(qualifierOffsetWidth, os);
|
||||
UVIntTool.writeBytes(tagsOffsetWidth, os);
|
||||
UVIntTool.writeBytes(timestampIndexWidth, os);
|
||||
UVIntTool.writeBytes(mvccVersionIndexWidth, os);
|
||||
UVIntTool.writeBytes(valueOffsetWidth, os);
|
||||
UVIntTool.writeBytes(valueLengthWidth, os);
|
||||
|
||||
UVIntTool.writeBytes(rowTreeDepth, os);
|
||||
UVIntTool.writeBytes(maxRowLength, os);
|
||||
UVIntTool.writeBytes(maxQualifierLength, os);
|
||||
UVIntTool.writeBytes(maxTagsLength, os);
|
||||
|
||||
UVLongTool.writeBytes(minTimestamp, os);
|
||||
UVIntTool.writeBytes(timestampDeltaWidth, os);
|
||||
UVLongTool.writeBytes(minMvccVersion, os);
|
||||
UVIntTool.writeBytes(mvccVersionDeltaWidth, os);
|
||||
os.write(getAllSameTypeByte());
|
||||
os.write(allTypes);
|
||||
|
||||
UVIntTool.writeBytes(numUniqueRows, os);
|
||||
UVIntTool.writeBytes(numUniqueFamilies, os);
|
||||
UVIntTool.writeBytes(numUniqueQualifiers, os);
|
||||
UVIntTool.writeBytes(numUniqueTags, os);
|
||||
}
|
||||
|
||||
public void readVariableBytesFromInputStream(InputStream is) throws IOException{
|
||||
version = UVIntTool.getInt(is);
|
||||
numMetaBytes = UVIntTool.getInt(is);
|
||||
numKeyValueBytes = UVIntTool.getInt(is);
|
||||
setIncludesMvccVersion((byte) is.read());
|
||||
|
||||
numRowBytes = UVIntTool.getInt(is);
|
||||
numFamilyBytes = UVIntTool.getInt(is);
|
||||
numQualifierBytes = UVIntTool.getInt(is);
|
||||
numTagsBytes = UVIntTool.getInt(is);
|
||||
numTimestampBytes = UVIntTool.getInt(is);
|
||||
numMvccVersionBytes = UVIntTool.getInt(is);
|
||||
numValueBytes = UVIntTool.getInt(is);
|
||||
|
||||
nextNodeOffsetWidth = UVIntTool.getInt(is);
|
||||
familyOffsetWidth = UVIntTool.getInt(is);
|
||||
qualifierOffsetWidth = UVIntTool.getInt(is);
|
||||
tagsOffsetWidth = UVIntTool.getInt(is);
|
||||
timestampIndexWidth = UVIntTool.getInt(is);
|
||||
mvccVersionIndexWidth = UVIntTool.getInt(is);
|
||||
valueOffsetWidth = UVIntTool.getInt(is);
|
||||
valueLengthWidth = UVIntTool.getInt(is);
|
||||
|
||||
rowTreeDepth = UVIntTool.getInt(is);
|
||||
maxRowLength = UVIntTool.getInt(is);
|
||||
maxQualifierLength = UVIntTool.getInt(is);
|
||||
maxTagsLength = UVIntTool.getInt(is);
|
||||
|
||||
minTimestamp = UVLongTool.getLong(is);
|
||||
timestampDeltaWidth = UVIntTool.getInt(is);
|
||||
minMvccVersion = UVLongTool.getLong(is);
|
||||
mvccVersionDeltaWidth = UVIntTool.getInt(is);
|
||||
|
||||
setAllSameType((byte) is.read());
|
||||
allTypes = (byte) is.read();
|
||||
|
||||
numUniqueRows = UVIntTool.getInt(is);
|
||||
numUniqueFamilies = UVIntTool.getInt(is);
|
||||
numUniqueQualifiers = UVIntTool.getInt(is);
|
||||
numUniqueTags = UVIntTool.getInt(is);
|
||||
}
|
||||
|
||||
public void readVariableBytesFromBuffer(ByteBuff buf, int offset) {
|
||||
int position = offset;
|
||||
|
||||
version = UVIntTool.getInt(buf, position);
|
||||
position += UVIntTool.numBytes(version);
|
||||
numMetaBytes = UVIntTool.getInt(buf, position);
|
||||
position += UVIntTool.numBytes(numMetaBytes);
|
||||
numKeyValueBytes = UVIntTool.getInt(buf, position);
|
||||
position += UVIntTool.numBytes(numKeyValueBytes);
|
||||
setIncludesMvccVersion(buf.get(position));
|
||||
++position;
|
||||
|
||||
numRowBytes = UVIntTool.getInt(buf, position);
|
||||
position += UVIntTool.numBytes(numRowBytes);
|
||||
numFamilyBytes = UVIntTool.getInt(buf, position);
|
||||
position += UVIntTool.numBytes(numFamilyBytes);
|
||||
numQualifierBytes = UVIntTool.getInt(buf, position);
|
||||
position += UVIntTool.numBytes(numQualifierBytes);
|
||||
numTagsBytes = UVIntTool.getInt(buf, position);
|
||||
position += UVIntTool.numBytes(numTagsBytes);
|
||||
numTimestampBytes = UVIntTool.getInt(buf, position);
|
||||
position += UVIntTool.numBytes(numTimestampBytes);
|
||||
numMvccVersionBytes = UVIntTool.getInt(buf, position);
|
||||
position += UVIntTool.numBytes(numMvccVersionBytes);
|
||||
numValueBytes = UVIntTool.getInt(buf, position);
|
||||
position += UVIntTool.numBytes(numValueBytes);
|
||||
|
||||
nextNodeOffsetWidth = UVIntTool.getInt(buf, position);
|
||||
position += UVIntTool.numBytes(nextNodeOffsetWidth);
|
||||
familyOffsetWidth = UVIntTool.getInt(buf, position);
|
||||
position += UVIntTool.numBytes(familyOffsetWidth);
|
||||
qualifierOffsetWidth = UVIntTool.getInt(buf, position);
|
||||
position += UVIntTool.numBytes(qualifierOffsetWidth);
|
||||
tagsOffsetWidth = UVIntTool.getInt(buf, position);
|
||||
position += UVIntTool.numBytes(tagsOffsetWidth);
|
||||
timestampIndexWidth = UVIntTool.getInt(buf, position);
|
||||
position += UVIntTool.numBytes(timestampIndexWidth);
|
||||
mvccVersionIndexWidth = UVIntTool.getInt(buf, position);
|
||||
position += UVIntTool.numBytes(mvccVersionIndexWidth);
|
||||
valueOffsetWidth = UVIntTool.getInt(buf, position);
|
||||
position += UVIntTool.numBytes(valueOffsetWidth);
|
||||
valueLengthWidth = UVIntTool.getInt(buf, position);
|
||||
position += UVIntTool.numBytes(valueLengthWidth);
|
||||
|
||||
rowTreeDepth = UVIntTool.getInt(buf, position);
|
||||
position += UVIntTool.numBytes(rowTreeDepth);
|
||||
maxRowLength = UVIntTool.getInt(buf, position);
|
||||
position += UVIntTool.numBytes(maxRowLength);
|
||||
maxQualifierLength = UVIntTool.getInt(buf, position);
|
||||
position += UVIntTool.numBytes(maxQualifierLength);
|
||||
maxTagsLength = UVIntTool.getInt(buf, position);
|
||||
position += UVIntTool.numBytes(maxTagsLength);
|
||||
minTimestamp = UVLongTool.getLong(buf, position);
|
||||
position += UVLongTool.numBytes(minTimestamp);
|
||||
timestampDeltaWidth = UVIntTool.getInt(buf, position);
|
||||
position += UVIntTool.numBytes(timestampDeltaWidth);
|
||||
minMvccVersion = UVLongTool.getLong(buf, position);
|
||||
position += UVLongTool.numBytes(minMvccVersion);
|
||||
mvccVersionDeltaWidth = UVIntTool.getInt(buf, position);
|
||||
position += UVIntTool.numBytes(mvccVersionDeltaWidth);
|
||||
|
||||
setAllSameType(buf.get(position));
|
||||
++position;
|
||||
allTypes = buf.get(position);
|
||||
++position;
|
||||
|
||||
numUniqueRows = UVIntTool.getInt(buf, position);
|
||||
position += UVIntTool.numBytes(numUniqueRows);
|
||||
numUniqueFamilies = UVIntTool.getInt(buf, position);
|
||||
position += UVIntTool.numBytes(numUniqueFamilies);
|
||||
numUniqueQualifiers = UVIntTool.getInt(buf, position);
|
||||
position += UVIntTool.numBytes(numUniqueQualifiers);
|
||||
numUniqueTags = UVIntTool.getInt(buf, position);
|
||||
position += UVIntTool.numBytes(numUniqueTags);
|
||||
}
|
||||
|
||||
//TODO method that can read directly from ByteBuffer instead of InputStream
|
||||
|
||||
|
||||
/*************** methods *************************/
|
||||
|
||||
public int getKeyValueTypeWidth() {
|
||||
return allSameType ? 0 : 1;
|
||||
}
|
||||
|
||||
public byte getIncludesMvccVersionByte() {
|
||||
return includesMvccVersion ? (byte) 1 : (byte) 0;
|
||||
}
|
||||
|
||||
public void setIncludesMvccVersion(byte includesMvccVersionByte) {
|
||||
includesMvccVersion = includesMvccVersionByte != 0;
|
||||
}
|
||||
|
||||
public byte getAllSameTypeByte() {
|
||||
return allSameType ? (byte) 1 : (byte) 0;
|
||||
}
|
||||
|
||||
public void setAllSameType(byte allSameTypeByte) {
|
||||
allSameType = allSameTypeByte != 0;
|
||||
}
|
||||
|
||||
public boolean isAllSameTimestamp() {
|
||||
return timestampIndexWidth == 0;
|
||||
}
|
||||
|
||||
public boolean isAllSameMvccVersion() {
|
||||
return mvccVersionIndexWidth == 0;
|
||||
}
|
||||
|
||||
public void setTimestampFields(LongEncoder encoder){
|
||||
this.minTimestamp = encoder.getMin();
|
||||
this.timestampIndexWidth = encoder.getBytesPerIndex();
|
||||
this.timestampDeltaWidth = encoder.getBytesPerDelta();
|
||||
this.numTimestampBytes = encoder.getTotalCompressedBytes();
|
||||
}
|
||||
|
||||
public void setMvccVersionFields(LongEncoder encoder){
|
||||
this.minMvccVersion = encoder.getMin();
|
||||
this.mvccVersionIndexWidth = encoder.getBytesPerIndex();
|
||||
this.mvccVersionDeltaWidth = encoder.getBytesPerDelta();
|
||||
this.numMvccVersionBytes = encoder.getTotalCompressedBytes();
|
||||
}
|
||||
|
||||
|
||||
/*************** Object methods *************************/
|
||||
|
||||
/**
|
||||
* Generated by Eclipse
|
||||
*/
|
||||
@Override
|
||||
public boolean equals(Object obj) {
|
||||
if (this == obj)
|
||||
return true;
|
||||
if (obj == null)
|
||||
return false;
|
||||
if (getClass() != obj.getClass())
|
||||
return false;
|
||||
PrefixTreeBlockMeta other = (PrefixTreeBlockMeta) obj;
|
||||
if (allSameType != other.allSameType)
|
||||
return false;
|
||||
if (allTypes != other.allTypes)
|
||||
return false;
|
||||
if (bufferOffset != other.bufferOffset)
|
||||
return false;
|
||||
if (valueLengthWidth != other.valueLengthWidth)
|
||||
return false;
|
||||
if (valueOffsetWidth != other.valueOffsetWidth)
|
||||
return false;
|
||||
if (familyOffsetWidth != other.familyOffsetWidth)
|
||||
return false;
|
||||
if (includesMvccVersion != other.includesMvccVersion)
|
||||
return false;
|
||||
if (maxQualifierLength != other.maxQualifierLength)
|
||||
return false;
|
||||
if (maxTagsLength != other.maxTagsLength)
|
||||
return false;
|
||||
if (maxRowLength != other.maxRowLength)
|
||||
return false;
|
||||
if (mvccVersionDeltaWidth != other.mvccVersionDeltaWidth)
|
||||
return false;
|
||||
if (mvccVersionIndexWidth != other.mvccVersionIndexWidth)
|
||||
return false;
|
||||
if (minMvccVersion != other.minMvccVersion)
|
||||
return false;
|
||||
if (minTimestamp != other.minTimestamp)
|
||||
return false;
|
||||
if (nextNodeOffsetWidth != other.nextNodeOffsetWidth)
|
||||
return false;
|
||||
if (numValueBytes != other.numValueBytes)
|
||||
return false;
|
||||
if (numFamilyBytes != other.numFamilyBytes)
|
||||
return false;
|
||||
if (numMvccVersionBytes != other.numMvccVersionBytes)
|
||||
return false;
|
||||
if (numMetaBytes != other.numMetaBytes)
|
||||
return false;
|
||||
if (numQualifierBytes != other.numQualifierBytes)
|
||||
return false;
|
||||
if (numTagsBytes != other.numTagsBytes)
|
||||
return false;
|
||||
if (numRowBytes != other.numRowBytes)
|
||||
return false;
|
||||
if (numTimestampBytes != other.numTimestampBytes)
|
||||
return false;
|
||||
if (numUniqueFamilies != other.numUniqueFamilies)
|
||||
return false;
|
||||
if (numUniqueQualifiers != other.numUniqueQualifiers)
|
||||
return false;
|
||||
if (numUniqueTags != other.numUniqueTags)
|
||||
return false;
|
||||
if (numUniqueRows != other.numUniqueRows)
|
||||
return false;
|
||||
if (numKeyValueBytes != other.numKeyValueBytes)
|
||||
return false;
|
||||
if (qualifierOffsetWidth != other.qualifierOffsetWidth)
|
||||
return false;
|
||||
if(tagsOffsetWidth != other.tagsOffsetWidth)
|
||||
return false;
|
||||
if (rowTreeDepth != other.rowTreeDepth)
|
||||
return false;
|
||||
if (timestampDeltaWidth != other.timestampDeltaWidth)
|
||||
return false;
|
||||
if (timestampIndexWidth != other.timestampIndexWidth)
|
||||
return false;
|
||||
if (version != other.version)
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Generated by Eclipse
|
||||
*/
|
||||
@Override
|
||||
public int hashCode() {
|
||||
final int prime = 31;
|
||||
int result = 1;
|
||||
result = prime * result + (allSameType ? 1231 : 1237);
|
||||
result = prime * result + allTypes;
|
||||
result = prime * result + bufferOffset;
|
||||
result = prime * result + valueLengthWidth;
|
||||
result = prime * result + valueOffsetWidth;
|
||||
result = prime * result + familyOffsetWidth;
|
||||
result = prime * result + (includesMvccVersion ? 1231 : 1237);
|
||||
result = prime * result + maxQualifierLength;
|
||||
result = prime * result + maxTagsLength;
|
||||
result = prime * result + maxRowLength;
|
||||
result = prime * result + mvccVersionDeltaWidth;
|
||||
result = prime * result + mvccVersionIndexWidth;
|
||||
result = prime * result + (int) (minMvccVersion ^ (minMvccVersion >>> 32));
|
||||
result = prime * result + (int) (minTimestamp ^ (minTimestamp >>> 32));
|
||||
result = prime * result + nextNodeOffsetWidth;
|
||||
result = prime * result + numValueBytes;
|
||||
result = prime * result + numFamilyBytes;
|
||||
result = prime * result + numMvccVersionBytes;
|
||||
result = prime * result + numMetaBytes;
|
||||
result = prime * result + numQualifierBytes;
|
||||
result = prime * result + numTagsBytes;
|
||||
result = prime * result + numRowBytes;
|
||||
result = prime * result + numTimestampBytes;
|
||||
result = prime * result + numUniqueFamilies;
|
||||
result = prime * result + numUniqueQualifiers;
|
||||
result = prime * result + numUniqueTags;
|
||||
result = prime * result + numUniqueRows;
|
||||
result = prime * result + numKeyValueBytes;
|
||||
result = prime * result + qualifierOffsetWidth;
|
||||
result = prime * result + tagsOffsetWidth;
|
||||
result = prime * result + rowTreeDepth;
|
||||
result = prime * result + timestampDeltaWidth;
|
||||
result = prime * result + timestampIndexWidth;
|
||||
result = prime * result + version;
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Generated by Eclipse
|
||||
*/
|
||||
@Override
|
||||
public String toString() {
|
||||
StringBuilder builder = new StringBuilder();
|
||||
builder.append("PtBlockMeta [bufferOffset=");
|
||||
builder.append(bufferOffset);
|
||||
builder.append(", version=");
|
||||
builder.append(version);
|
||||
builder.append(", numMetaBytes=");
|
||||
builder.append(numMetaBytes);
|
||||
builder.append(", numKeyValueBytes=");
|
||||
builder.append(numKeyValueBytes);
|
||||
builder.append(", includesMvccVersion=");
|
||||
builder.append(includesMvccVersion);
|
||||
builder.append(", numRowBytes=");
|
||||
builder.append(numRowBytes);
|
||||
builder.append(", numFamilyBytes=");
|
||||
builder.append(numFamilyBytes);
|
||||
builder.append(", numQualifierBytes=");
|
||||
builder.append(numQualifierBytes);
|
||||
builder.append(", numTimestampBytes=");
|
||||
builder.append(numTimestampBytes);
|
||||
builder.append(", numMvccVersionBytes=");
|
||||
builder.append(numMvccVersionBytes);
|
||||
builder.append(", numValueBytes=");
|
||||
builder.append(numValueBytes);
|
||||
builder.append(", numTagBytes=");
|
||||
builder.append(numTagsBytes);
|
||||
builder.append(", nextNodeOffsetWidth=");
|
||||
builder.append(nextNodeOffsetWidth);
|
||||
builder.append(", familyOffsetWidth=");
|
||||
builder.append(familyOffsetWidth);
|
||||
builder.append(", qualifierOffsetWidth=");
|
||||
builder.append(qualifierOffsetWidth);
|
||||
builder.append(", tagOffsetWidth=");
|
||||
builder.append(tagsOffsetWidth);
|
||||
builder.append(", timestampIndexWidth=");
|
||||
builder.append(timestampIndexWidth);
|
||||
builder.append(", mvccVersionIndexWidth=");
|
||||
builder.append(mvccVersionIndexWidth);
|
||||
builder.append(", valueOffsetWidth=");
|
||||
builder.append(valueOffsetWidth);
|
||||
builder.append(", valueLengthWidth=");
|
||||
builder.append(valueLengthWidth);
|
||||
builder.append(", rowTreeDepth=");
|
||||
builder.append(rowTreeDepth);
|
||||
builder.append(", maxRowLength=");
|
||||
builder.append(maxRowLength);
|
||||
builder.append(", maxQualifierLength=");
|
||||
builder.append(maxQualifierLength);
|
||||
builder.append(", maxTagLength=");
|
||||
builder.append(maxTagsLength);
|
||||
builder.append(", minTimestamp=");
|
||||
builder.append(minTimestamp);
|
||||
builder.append(", timestampDeltaWidth=");
|
||||
builder.append(timestampDeltaWidth);
|
||||
builder.append(", minMvccVersion=");
|
||||
builder.append(minMvccVersion);
|
||||
builder.append(", mvccVersionDeltaWidth=");
|
||||
builder.append(mvccVersionDeltaWidth);
|
||||
builder.append(", allSameType=");
|
||||
builder.append(allSameType);
|
||||
builder.append(", allTypes=");
|
||||
builder.append(allTypes);
|
||||
builder.append(", numUniqueRows=");
|
||||
builder.append(numUniqueRows);
|
||||
builder.append(", numUniqueFamilies=");
|
||||
builder.append(numUniqueFamilies);
|
||||
builder.append(", numUniqueQualifiers=");
|
||||
builder.append(numUniqueQualifiers);
|
||||
builder.append(", numUniqueTags=");
|
||||
builder.append(numUniqueTags);
|
||||
builder.append("]");
|
||||
return builder.toString();
|
||||
}
|
||||
|
||||
|
||||
/************** absolute getters *******************/
|
||||
|
||||
public int getAbsoluteRowOffset() {
|
||||
return getBufferOffset() + numMetaBytes;
|
||||
}
|
||||
|
||||
public int getAbsoluteFamilyOffset() {
|
||||
return getAbsoluteRowOffset() + numRowBytes;
|
||||
}
|
||||
|
||||
public int getAbsoluteQualifierOffset() {
|
||||
return getAbsoluteFamilyOffset() + numFamilyBytes;
|
||||
}
|
||||
|
||||
public int getAbsoluteTagsOffset() {
|
||||
return getAbsoluteQualifierOffset() + numQualifierBytes;
|
||||
}
|
||||
|
||||
public int getAbsoluteTimestampOffset() {
|
||||
return getAbsoluteTagsOffset() + numTagsBytes;
|
||||
}
|
||||
|
||||
public int getAbsoluteMvccVersionOffset() {
|
||||
return getAbsoluteTimestampOffset() + numTimestampBytes;
|
||||
}
|
||||
|
||||
public int getAbsoluteValueOffset() {
|
||||
return getAbsoluteMvccVersionOffset() + numMvccVersionBytes;
|
||||
}
|
||||
|
||||
|
||||
/*************** get/set ***************************/
|
||||
|
||||
public int getTimestampDeltaWidth() {
|
||||
return timestampDeltaWidth;
|
||||
}
|
||||
|
||||
public void setTimestampDeltaWidth(int timestampDeltaWidth) {
|
||||
this.timestampDeltaWidth = timestampDeltaWidth;
|
||||
}
|
||||
|
||||
public int getValueOffsetWidth() {
|
||||
return valueOffsetWidth;
|
||||
}
|
||||
|
||||
public int getTagsOffsetWidth() {
|
||||
return tagsOffsetWidth;
|
||||
}
|
||||
|
||||
public void setValueOffsetWidth(int dataOffsetWidth) {
|
||||
this.valueOffsetWidth = dataOffsetWidth;
|
||||
}
|
||||
|
||||
public void setTagsOffsetWidth(int dataOffsetWidth) {
|
||||
this.tagsOffsetWidth = dataOffsetWidth;
|
||||
}
|
||||
|
||||
public int getValueLengthWidth() {
|
||||
return valueLengthWidth;
|
||||
}
|
||||
|
||||
public void setValueLengthWidth(int dataLengthWidth) {
|
||||
this.valueLengthWidth = dataLengthWidth;
|
||||
}
|
||||
|
||||
public int getMaxRowLength() {
|
||||
return maxRowLength;
|
||||
}
|
||||
|
||||
public void setMaxRowLength(int maxRowLength) {
|
||||
this.maxRowLength = maxRowLength;
|
||||
}
|
||||
|
||||
public long getMinTimestamp() {
|
||||
return minTimestamp;
|
||||
}
|
||||
|
||||
public void setMinTimestamp(long minTimestamp) {
|
||||
this.minTimestamp = minTimestamp;
|
||||
}
|
||||
|
||||
public byte getAllTypes() {
|
||||
return allTypes;
|
||||
}
|
||||
|
||||
public void setAllTypes(byte allTypes) {
|
||||
this.allTypes = allTypes;
|
||||
}
|
||||
|
||||
public boolean isAllSameType() {
|
||||
return allSameType;
|
||||
}
|
||||
|
||||
public void setAllSameType(boolean allSameType) {
|
||||
this.allSameType = allSameType;
|
||||
}
|
||||
|
||||
public int getNextNodeOffsetWidth() {
|
||||
return nextNodeOffsetWidth;
|
||||
}
|
||||
|
||||
public void setNextNodeOffsetWidth(int nextNodeOffsetWidth) {
|
||||
this.nextNodeOffsetWidth = nextNodeOffsetWidth;
|
||||
}
|
||||
|
||||
public int getNumRowBytes() {
|
||||
return numRowBytes;
|
||||
}
|
||||
|
||||
public void setNumRowBytes(int numRowBytes) {
|
||||
this.numRowBytes = numRowBytes;
|
||||
}
|
||||
|
||||
public int getNumTimestampBytes() {
|
||||
return numTimestampBytes;
|
||||
}
|
||||
|
||||
public void setNumTimestampBytes(int numTimestampBytes) {
|
||||
this.numTimestampBytes = numTimestampBytes;
|
||||
}
|
||||
|
||||
public int getNumValueBytes() {
|
||||
return numValueBytes;
|
||||
}
|
||||
|
||||
public int getNumTagsBytes() {
|
||||
return numTagsBytes;
|
||||
}
|
||||
|
||||
public void setNumTagsBytes(int numTagBytes){
|
||||
this.numTagsBytes = numTagBytes;
|
||||
}
|
||||
|
||||
public void setNumValueBytes(int numValueBytes) {
|
||||
this.numValueBytes = numValueBytes;
|
||||
}
|
||||
|
||||
public int getNumMetaBytes() {
|
||||
return numMetaBytes;
|
||||
}
|
||||
|
||||
public void setNumMetaBytes(int numMetaBytes) {
|
||||
this.numMetaBytes = numMetaBytes;
|
||||
}
|
||||
|
||||
public int getBufferOffset() {
|
||||
return bufferOffset;
|
||||
}
|
||||
|
||||
public void setBufferOffset(int bufferOffset) {
|
||||
this.bufferOffset = bufferOffset;
|
||||
}
|
||||
|
||||
public int getNumKeyValueBytes() {
|
||||
return numKeyValueBytes;
|
||||
}
|
||||
|
||||
public void setNumKeyValueBytes(int numKeyValueBytes) {
|
||||
this.numKeyValueBytes = numKeyValueBytes;
|
||||
}
|
||||
|
||||
public int getRowTreeDepth() {
|
||||
return rowTreeDepth;
|
||||
}
|
||||
|
||||
public void setRowTreeDepth(int rowTreeDepth) {
|
||||
this.rowTreeDepth = rowTreeDepth;
|
||||
}
|
||||
|
||||
public int getNumMvccVersionBytes() {
|
||||
return numMvccVersionBytes;
|
||||
}
|
||||
|
||||
public void setNumMvccVersionBytes(int numMvccVersionBytes) {
|
||||
this.numMvccVersionBytes = numMvccVersionBytes;
|
||||
}
|
||||
|
||||
public int getMvccVersionDeltaWidth() {
|
||||
return mvccVersionDeltaWidth;
|
||||
}
|
||||
|
||||
public void setMvccVersionDeltaWidth(int mvccVersionDeltaWidth) {
|
||||
this.mvccVersionDeltaWidth = mvccVersionDeltaWidth;
|
||||
}
|
||||
|
||||
public long getMinMvccVersion() {
|
||||
return minMvccVersion;
|
||||
}
|
||||
|
||||
public void setMinMvccVersion(long minMvccVersion) {
|
||||
this.minMvccVersion = minMvccVersion;
|
||||
}
|
||||
|
||||
public int getNumFamilyBytes() {
|
||||
return numFamilyBytes;
|
||||
}
|
||||
|
||||
public void setNumFamilyBytes(int numFamilyBytes) {
|
||||
this.numFamilyBytes = numFamilyBytes;
|
||||
}
|
||||
|
||||
public int getFamilyOffsetWidth() {
|
||||
return familyOffsetWidth;
|
||||
}
|
||||
|
||||
public void setFamilyOffsetWidth(int familyOffsetWidth) {
|
||||
this.familyOffsetWidth = familyOffsetWidth;
|
||||
}
|
||||
|
||||
public int getNumUniqueRows() {
|
||||
return numUniqueRows;
|
||||
}
|
||||
|
||||
public void setNumUniqueRows(int numUniqueRows) {
|
||||
this.numUniqueRows = numUniqueRows;
|
||||
}
|
||||
|
||||
public int getNumUniqueFamilies() {
|
||||
return numUniqueFamilies;
|
||||
}
|
||||
|
||||
public void setNumUniqueFamilies(int numUniqueFamilies) {
|
||||
this.numUniqueFamilies = numUniqueFamilies;
|
||||
}
|
||||
|
||||
public int getNumUniqueQualifiers() {
|
||||
return numUniqueQualifiers;
|
||||
}
|
||||
|
||||
public void setNumUniqueQualifiers(int numUniqueQualifiers) {
|
||||
this.numUniqueQualifiers = numUniqueQualifiers;
|
||||
}
|
||||
|
||||
public void setNumUniqueTags(int numUniqueTags) {
|
||||
this.numUniqueTags = numUniqueTags;
|
||||
}
|
||||
|
||||
public int getNumUniqueTags() {
|
||||
return numUniqueTags;
|
||||
}
|
||||
public int getNumQualifierBytes() {
|
||||
return numQualifierBytes;
|
||||
}
|
||||
|
||||
public void setNumQualifierBytes(int numQualifierBytes) {
|
||||
this.numQualifierBytes = numQualifierBytes;
|
||||
}
|
||||
|
||||
public int getQualifierOffsetWidth() {
|
||||
return qualifierOffsetWidth;
|
||||
}
|
||||
|
||||
public void setQualifierOffsetWidth(int qualifierOffsetWidth) {
|
||||
this.qualifierOffsetWidth = qualifierOffsetWidth;
|
||||
}
|
||||
|
||||
public int getMaxQualifierLength() {
|
||||
return maxQualifierLength;
|
||||
}
|
||||
|
||||
// TODO : decide on some max value for this ? INTEGER_MAX?
|
||||
public void setMaxQualifierLength(int maxQualifierLength) {
|
||||
this.maxQualifierLength = maxQualifierLength;
|
||||
}
|
||||
|
||||
public int getMaxTagsLength() {
|
||||
return this.maxTagsLength;
|
||||
}
|
||||
|
||||
public void setMaxTagsLength(int maxTagLength) {
|
||||
this.maxTagsLength = maxTagLength;
|
||||
}
|
||||
|
||||
public int getTimestampIndexWidth() {
|
||||
return timestampIndexWidth;
|
||||
}
|
||||
|
||||
public void setTimestampIndexWidth(int timestampIndexWidth) {
|
||||
this.timestampIndexWidth = timestampIndexWidth;
|
||||
}
|
||||
|
||||
public int getMvccVersionIndexWidth() {
|
||||
return mvccVersionIndexWidth;
|
||||
}
|
||||
|
||||
public void setMvccVersionIndexWidth(int mvccVersionIndexWidth) {
|
||||
this.mvccVersionIndexWidth = mvccVersionIndexWidth;
|
||||
}
|
||||
|
||||
public int getVersion() {
|
||||
return version;
|
||||
}
|
||||
|
||||
public void setVersion(int version) {
|
||||
this.version = version;
|
||||
}
|
||||
|
||||
public boolean isIncludesMvccVersion() {
|
||||
return includesMvccVersion;
|
||||
}
|
||||
|
||||
public void setIncludesMvccVersion(boolean includesMvccVersion) {
|
||||
this.includesMvccVersion = includesMvccVersion;
|
||||
}
|
||||
|
||||
}
|
|
@ -1,216 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.hbase.codec.prefixtree;
|
||||
|
||||
import java.io.DataInputStream;
|
||||
import java.io.DataOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.nio.ByteBuffer;
|
||||
|
||||
import org.apache.yetus.audience.InterfaceAudience;
|
||||
import org.apache.hadoop.hbase.Cell;
|
||||
import org.apache.hadoop.hbase.CellComparator;
|
||||
import org.apache.hadoop.hbase.KeyValue;
|
||||
import org.apache.hadoop.hbase.CellComparatorImpl.MetaCellComparator;
|
||||
import org.apache.hadoop.hbase.KeyValueUtil;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.decode.DecoderFactory;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.decode.PrefixTreeArraySearcher;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.encode.EncoderFactory;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.encode.PrefixTreeEncoder;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.scanner.CellSearcher;
|
||||
import org.apache.hadoop.hbase.io.encoding.DataBlockEncoder;
|
||||
import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
|
||||
import org.apache.hadoop.hbase.io.encoding.EncodingState;
|
||||
import org.apache.hadoop.hbase.io.encoding.HFileBlockDecodingContext;
|
||||
import org.apache.hadoop.hbase.io.encoding.HFileBlockDefaultDecodingContext;
|
||||
import org.apache.hadoop.hbase.io.encoding.HFileBlockDefaultEncodingContext;
|
||||
import org.apache.hadoop.hbase.io.encoding.HFileBlockEncodingContext;
|
||||
import org.apache.hadoop.hbase.io.hfile.BlockType;
|
||||
import org.apache.hadoop.hbase.io.hfile.HFileContext;
|
||||
import org.apache.hadoop.hbase.nio.ByteBuff;
|
||||
import org.apache.hadoop.hbase.nio.SingleByteBuff;
|
||||
import org.apache.hadoop.hbase.util.ByteBufferUtils;
|
||||
import org.apache.hadoop.io.WritableUtils;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* This class is created via reflection in DataBlockEncoding enum. Update the enum if class name or
|
||||
* package changes.
|
||||
* </p>
|
||||
* PrefixTreeDataBlockEncoder implementation of DataBlockEncoder. This is the primary entry point
|
||||
* for PrefixTree encoding and decoding. Encoding is delegated to instances of
|
||||
* {@link PrefixTreeEncoder}, and decoding is delegated to instances of
|
||||
* {@link org.apache.hadoop.hbase.codec.prefixtree.scanner.CellSearcher}.
|
||||
* Encoder and decoder instances are
|
||||
* created and recycled by static PtEncoderFactory and PtDecoderFactory.
|
||||
*/
|
||||
@InterfaceAudience.Private
|
||||
public class PrefixTreeCodec implements DataBlockEncoder {
|
||||
|
||||
/**
|
||||
* no-arg constructor for reflection
|
||||
*/
|
||||
public PrefixTreeCodec() {
|
||||
}
|
||||
|
||||
@Override
|
||||
public ByteBuffer decodeKeyValues(DataInputStream source, HFileBlockDecodingContext decodingCtx)
|
||||
throws IOException {
|
||||
return decodeKeyValues(source, 0, 0, decodingCtx);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* I don't think this method is called during normal HBase operation, so efficiency is not
|
||||
* important.
|
||||
*/
|
||||
public ByteBuffer decodeKeyValues(DataInputStream source, int allocateHeaderLength,
|
||||
int skipLastBytes, HFileBlockDecodingContext decodingCtx) throws IOException {
|
||||
ByteBuffer sourceAsBuffer = ByteBufferUtils.drainInputStreamToBuffer(source);// waste
|
||||
sourceAsBuffer.mark();
|
||||
PrefixTreeBlockMeta blockMeta = new PrefixTreeBlockMeta(new SingleByteBuff(sourceAsBuffer));
|
||||
sourceAsBuffer.rewind();
|
||||
int numV1BytesWithHeader = allocateHeaderLength + blockMeta.getNumKeyValueBytes();
|
||||
byte[] keyValueBytesWithHeader = new byte[numV1BytesWithHeader];
|
||||
ByteBuffer result = ByteBuffer.wrap(keyValueBytesWithHeader);
|
||||
result.rewind();
|
||||
CellSearcher searcher = null;
|
||||
try {
|
||||
boolean includesMvcc = decodingCtx.getHFileContext().isIncludesMvcc();
|
||||
searcher = DecoderFactory.checkOut(new SingleByteBuff(sourceAsBuffer), includesMvcc);
|
||||
while (searcher.advance()) {
|
||||
KeyValue currentCell = KeyValueUtil.copyToNewKeyValue(searcher.current());
|
||||
// needs to be modified for DirectByteBuffers. no existing methods to
|
||||
// write VLongs to byte[]
|
||||
int offset = result.arrayOffset() + result.position();
|
||||
System.arraycopy(currentCell.getBuffer(), currentCell.getOffset(), result.array(), offset,
|
||||
currentCell.getLength());
|
||||
int keyValueLength = KeyValueUtil.length(currentCell);
|
||||
ByteBufferUtils.skip(result, keyValueLength);
|
||||
offset += keyValueLength;
|
||||
if (includesMvcc) {
|
||||
ByteBufferUtils.writeVLong(result, currentCell.getSequenceId());
|
||||
}
|
||||
}
|
||||
result.position(result.limit());//make it appear as if we were appending
|
||||
return result;
|
||||
} finally {
|
||||
DecoderFactory.checkIn(searcher);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public Cell getFirstKeyCellInBlock(ByteBuff block) {
|
||||
block.rewind();
|
||||
PrefixTreeArraySearcher searcher = null;
|
||||
try {
|
||||
// should i includeMemstoreTS (second argument)? i think PrefixKeyDeltaEncoder is, so i will
|
||||
searcher = DecoderFactory.checkOut(block, true);
|
||||
if (!searcher.positionAtFirstCell()) {
|
||||
return null;
|
||||
}
|
||||
return searcher.current();
|
||||
} finally {
|
||||
DecoderFactory.checkIn(searcher);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public HFileBlockEncodingContext newDataBlockEncodingContext(
|
||||
DataBlockEncoding encoding, byte[] header, HFileContext meta) {
|
||||
if(DataBlockEncoding.PREFIX_TREE != encoding){
|
||||
//i'm not sure why encoding is in the interface. Each encoder implementation should probably
|
||||
//know it's encoding type
|
||||
throw new IllegalArgumentException("only DataBlockEncoding.PREFIX_TREE supported");
|
||||
}
|
||||
return new HFileBlockDefaultEncodingContext(encoding, header, meta);
|
||||
}
|
||||
|
||||
@Override
|
||||
public HFileBlockDecodingContext newDataBlockDecodingContext(HFileContext meta) {
|
||||
return new HFileBlockDefaultDecodingContext(meta);
|
||||
}
|
||||
|
||||
/**
|
||||
* Is this the correct handling of an illegal comparator? How to prevent that from getting all
|
||||
* the way to this point.
|
||||
*/
|
||||
@Override
|
||||
public EncodedSeeker createSeeker(CellComparator comparator,
|
||||
HFileBlockDecodingContext decodingCtx) {
|
||||
if (comparator instanceof MetaCellComparator) {
|
||||
throw new IllegalArgumentException(
|
||||
"DataBlockEncoding.PREFIX_TREE not compatible with hbase:meta " + "table");
|
||||
}
|
||||
|
||||
return new PrefixTreeSeeker(decodingCtx.getHFileContext().isIncludesMvcc());
|
||||
}
|
||||
|
||||
@Override
|
||||
public int encode(Cell cell, HFileBlockEncodingContext encodingCtx, DataOutputStream out)
|
||||
throws IOException {
|
||||
PrefixTreeEncodingState state = (PrefixTreeEncodingState) encodingCtx.getEncodingState();
|
||||
PrefixTreeEncoder builder = state.builder;
|
||||
builder.write(cell);
|
||||
int size = KeyValueUtil.length(cell);
|
||||
if (encodingCtx.getHFileContext().isIncludesMvcc()) {
|
||||
size += WritableUtils.getVIntSize(cell.getSequenceId());
|
||||
}
|
||||
return size;
|
||||
}
|
||||
|
||||
private static class PrefixTreeEncodingState extends EncodingState {
|
||||
PrefixTreeEncoder builder = null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void startBlockEncoding(HFileBlockEncodingContext blkEncodingCtx, DataOutputStream out)
|
||||
throws IOException {
|
||||
if (blkEncodingCtx.getClass() != HFileBlockDefaultEncodingContext.class) {
|
||||
throw new IOException(this.getClass().getName() + " only accepts "
|
||||
+ HFileBlockDefaultEncodingContext.class.getName() + " as the " + "encoding context.");
|
||||
}
|
||||
|
||||
HFileBlockDefaultEncodingContext encodingCtx =
|
||||
(HFileBlockDefaultEncodingContext) blkEncodingCtx;
|
||||
encodingCtx.prepareEncoding(out);
|
||||
|
||||
PrefixTreeEncoder builder = EncoderFactory.checkOut(out, encodingCtx.getHFileContext()
|
||||
.isIncludesMvcc());
|
||||
PrefixTreeEncodingState state = new PrefixTreeEncodingState();
|
||||
state.builder = builder;
|
||||
blkEncodingCtx.setEncodingState(state);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void endBlockEncoding(HFileBlockEncodingContext encodingCtx, DataOutputStream out,
|
||||
byte[] uncompressedBytesWithHeader) throws IOException {
|
||||
PrefixTreeEncodingState state = (PrefixTreeEncodingState) encodingCtx.getEncodingState();
|
||||
PrefixTreeEncoder builder = state.builder;
|
||||
builder.flush();
|
||||
EncoderFactory.checkIn(builder);
|
||||
// do i need to check this, or will it always be DataBlockEncoding.PREFIX_TREE?
|
||||
if (encodingCtx.getDataBlockEncoding() != DataBlockEncoding.NONE) {
|
||||
encodingCtx.postEncoding(BlockType.ENCODED_DATA);
|
||||
} else {
|
||||
encodingCtx.postEncoding(BlockType.DATA);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,586 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.hbase.codec.prefixtree;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
|
||||
import org.apache.hadoop.hbase.ByteBufferCell;
|
||||
import org.apache.hadoop.hbase.Cell;
|
||||
import org.apache.hadoop.hbase.CellComparator;
|
||||
import org.apache.hadoop.hbase.CellUtil;
|
||||
import org.apache.hadoop.hbase.PrivateCellUtil;
|
||||
import org.apache.hadoop.hbase.KeyValue.Type;
|
||||
import org.apache.hadoop.hbase.SettableSequenceId;
|
||||
import org.apache.yetus.audience.InterfaceAudience;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.decode.DecoderFactory;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.decode.PrefixTreeArraySearcher;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.scanner.CellScannerPosition;
|
||||
import org.apache.hadoop.hbase.io.HeapSize;
|
||||
import org.apache.hadoop.hbase.io.encoding.DataBlockEncoder.EncodedSeeker;
|
||||
import org.apache.hadoop.hbase.nio.ByteBuff;
|
||||
import org.apache.hadoop.hbase.util.ByteBufferUtils;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
import org.apache.hadoop.hbase.util.ClassSize;
|
||||
|
||||
/**
|
||||
* These methods have the same definition as any implementation of the EncodedSeeker.
|
||||
*
|
||||
* In the future, the EncodedSeeker could be modified to work with the Cell interface directly. It
|
||||
* currently returns a new KeyValue object each time getKeyValue is called. This is not horrible,
|
||||
* but in order to create a new KeyValue object, we must first allocate a new byte[] and copy in
|
||||
* the data from the PrefixTreeCell. It is somewhat heavyweight right now.
|
||||
*/
|
||||
@InterfaceAudience.Private
|
||||
public class PrefixTreeSeeker implements EncodedSeeker {
|
||||
|
||||
protected boolean includeMvccVersion;
|
||||
protected PrefixTreeArraySearcher ptSearcher;
|
||||
|
||||
public PrefixTreeSeeker(boolean includeMvccVersion) {
|
||||
this.includeMvccVersion = includeMvccVersion;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setCurrentBuffer(ByteBuff fullBlockBuffer) {
|
||||
ptSearcher = DecoderFactory.checkOut(fullBlockBuffer, includeMvccVersion);
|
||||
rewind();
|
||||
}
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Currently unused.
|
||||
* </p>
|
||||
* TODO performance leak. should reuse the searchers. hbase does not currently have a hook where
|
||||
* this can be called
|
||||
*/
|
||||
public void releaseCurrentSearcher(){
|
||||
DecoderFactory.checkIn(ptSearcher);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public Cell getKey() {
|
||||
return ptSearcher.current();
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public ByteBuffer getValueShallowCopy() {
|
||||
return PrivateCellUtil.getValueBufferShallowCopy(ptSearcher.current());
|
||||
}
|
||||
|
||||
/**
|
||||
* currently must do deep copy into new array
|
||||
*/
|
||||
@Override
|
||||
public Cell getCell() {
|
||||
// The PrefixTreecell is of type BytebufferedCell and the value part of the cell
|
||||
// determines whether we are offheap cell or onheap cell. All other parts of the cell-
|
||||
// row, fam and col are all represented as onheap byte[]
|
||||
ByteBufferCell cell = (ByteBufferCell)ptSearcher.current();
|
||||
if (cell == null) {
|
||||
return null;
|
||||
}
|
||||
// Use the ByteBuffered cell to see if the Cell is onheap or offheap
|
||||
if (cell.getValueByteBuffer().hasArray()) {
|
||||
return new OnheapPrefixTreeCell(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength(),
|
||||
cell.getFamilyArray(), cell.getFamilyOffset(), cell.getFamilyLength(),
|
||||
cell.getQualifierArray(), cell.getQualifierOffset(), cell.getQualifierLength(),
|
||||
cell.getValueArray(), cell.getValueOffset(), cell.getValueLength(), cell.getTagsArray(),
|
||||
cell.getTagsOffset(), cell.getTagsLength(), cell.getTimestamp(), cell.getTypeByte(),
|
||||
cell.getSequenceId());
|
||||
} else {
|
||||
return new OffheapPrefixTreeCell(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength(),
|
||||
cell.getFamilyArray(), cell.getFamilyOffset(), cell.getFamilyLength(),
|
||||
cell.getQualifierArray(), cell.getQualifierOffset(), cell.getQualifierLength(),
|
||||
cell.getValueByteBuffer(), cell.getValuePosition(), cell.getValueLength(),
|
||||
cell.getTagsArray(), cell.getTagsOffset(), cell.getTagsLength(), cell.getTimestamp(),
|
||||
cell.getTypeByte(), cell.getSequenceId());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Currently unused.
|
||||
* </p><p>
|
||||
* A nice, lightweight reference, though the underlying cell is transient. This method may return
|
||||
* the same reference to the backing PrefixTreeCell repeatedly, while other implementations may
|
||||
* return a different reference for each Cell.
|
||||
* </p>
|
||||
* The goal will be to transition the upper layers of HBase, like Filters and KeyValueHeap, to
|
||||
* use this method instead of the getKeyValue() methods above.
|
||||
*/
|
||||
public Cell get() {
|
||||
return ptSearcher.current();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void rewind() {
|
||||
ptSearcher.positionAtFirstCell();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean next() {
|
||||
return ptSearcher.advance();
|
||||
}
|
||||
|
||||
public boolean advance() {
|
||||
return ptSearcher.advance();
|
||||
}
|
||||
|
||||
|
||||
private static final boolean USE_POSITION_BEFORE = false;
|
||||
|
||||
/*
|
||||
* Support both of these options since the underlying PrefixTree supports
|
||||
* both. Possibly expand the EncodedSeeker to utilize them both.
|
||||
*/
|
||||
|
||||
protected int seekToOrBeforeUsingPositionAtOrBefore(Cell kv, boolean seekBefore) {
|
||||
// this does a deep copy of the key byte[] because the CellSearcher
|
||||
// interface wants a Cell
|
||||
CellScannerPosition position = ptSearcher.seekForwardToOrBefore(kv);
|
||||
|
||||
if (CellScannerPosition.AT == position) {
|
||||
if (seekBefore) {
|
||||
ptSearcher.previous();
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
protected int seekToOrBeforeUsingPositionAtOrAfter(Cell kv, boolean seekBefore) {
|
||||
// should probably switch this to use the seekForwardToOrBefore method
|
||||
CellScannerPosition position = ptSearcher.seekForwardToOrAfter(kv);
|
||||
|
||||
if (CellScannerPosition.AT == position) {
|
||||
if (seekBefore) {
|
||||
ptSearcher.previous();
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
|
||||
}
|
||||
|
||||
if (CellScannerPosition.AFTER == position) {
|
||||
if (!ptSearcher.isBeforeFirst()) {
|
||||
ptSearcher.previous();
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (position == CellScannerPosition.AFTER_LAST) {
|
||||
if (seekBefore) {
|
||||
ptSearcher.previous();
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
throw new RuntimeException("unexpected CellScannerPosition:" + position);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int seekToKeyInBlock(Cell key, boolean forceBeforeOnExactMatch) {
|
||||
if (USE_POSITION_BEFORE) {
|
||||
return seekToOrBeforeUsingPositionAtOrBefore(key, forceBeforeOnExactMatch);
|
||||
} else {
|
||||
return seekToOrBeforeUsingPositionAtOrAfter(key, forceBeforeOnExactMatch);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareKey(CellComparator comparator, Cell key) {
|
||||
return comparator.compare(key,
|
||||
ptSearcher.current());
|
||||
}
|
||||
|
||||
/**
|
||||
* Cloned version of the PrefixTreeCell where except the value part, the rest
|
||||
* of the key part is deep copied
|
||||
*
|
||||
*/
|
||||
private static class OnheapPrefixTreeCell implements Cell, SettableSequenceId, HeapSize {
|
||||
private static final long FIXED_OVERHEAD = ClassSize.align(ClassSize.OBJECT
|
||||
+ (5 * ClassSize.REFERENCE) + (2 * Bytes.SIZEOF_LONG) + (4 * Bytes.SIZEOF_INT)
|
||||
+ (Bytes.SIZEOF_SHORT) + (2 * Bytes.SIZEOF_BYTE) + (5 * ClassSize.ARRAY));
|
||||
private byte[] row;
|
||||
private short rowLength;
|
||||
private byte[] fam;
|
||||
private byte famLength;
|
||||
private byte[] qual;
|
||||
private int qualLength;
|
||||
private byte[] val;
|
||||
private int valOffset;
|
||||
private int valLength;
|
||||
private byte[] tag;
|
||||
private int tagsLength;
|
||||
private long ts;
|
||||
private long seqId;
|
||||
private byte type;
|
||||
|
||||
public OnheapPrefixTreeCell(byte[] row, int rowOffset, short rowLength, byte[] fam,
|
||||
int famOffset, byte famLength, byte[] qual, int qualOffset, int qualLength, byte[] val,
|
||||
int valOffset, int valLength, byte[] tag, int tagOffset, int tagLength, long ts, byte type,
|
||||
long seqId) {
|
||||
this.row = new byte[rowLength];
|
||||
System.arraycopy(row, rowOffset, this.row, 0, rowLength);
|
||||
this.rowLength = rowLength;
|
||||
this.fam = new byte[famLength];
|
||||
System.arraycopy(fam, famOffset, this.fam, 0, famLength);
|
||||
this.famLength = famLength;
|
||||
this.qual = new byte[qualLength];
|
||||
System.arraycopy(qual, qualOffset, this.qual, 0, qualLength);
|
||||
this.qualLength = qualLength;
|
||||
this.tag = new byte[tagLength];
|
||||
System.arraycopy(tag, tagOffset, this.tag, 0, tagLength);
|
||||
this.tagsLength = tagLength;
|
||||
this.val = val;
|
||||
this.valLength = valLength;
|
||||
this.valOffset = valOffset;
|
||||
this.ts = ts;
|
||||
this.seqId = seqId;
|
||||
this.type = type;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setSequenceId(long seqId) {
|
||||
this.seqId = seqId;
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte[] getRowArray() {
|
||||
return this.row;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getRowOffset() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public short getRowLength() {
|
||||
return this.rowLength;
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte[] getFamilyArray() {
|
||||
return this.fam;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getFamilyOffset() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte getFamilyLength() {
|
||||
return this.famLength;
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte[] getQualifierArray() {
|
||||
return this.qual;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getQualifierOffset() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getQualifierLength() {
|
||||
return this.qualLength;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getTimestamp() {
|
||||
return ts;
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte getTypeByte() {
|
||||
return type;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getSequenceId() {
|
||||
return seqId;
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte[] getValueArray() {
|
||||
return val;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getValueOffset() {
|
||||
return this.valOffset;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getValueLength() {
|
||||
return this.valLength;
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte[] getTagsArray() {
|
||||
return this.tag;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getTagsOffset() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getTagsLength() {
|
||||
return this.tagsLength;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
String row = Bytes.toStringBinary(getRowArray(), getRowOffset(), getRowLength());
|
||||
String family = Bytes.toStringBinary(getFamilyArray(), getFamilyOffset(), getFamilyLength());
|
||||
String qualifier = Bytes.toStringBinary(getQualifierArray(), getQualifierOffset(),
|
||||
getQualifierLength());
|
||||
String timestamp = String.valueOf((getTimestamp()));
|
||||
return row + "/" + family + (family != null && family.length() > 0 ? ":" : "") + qualifier
|
||||
+ "/" + timestamp + "/" + Type.codeToType(type);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long heapSize() {
|
||||
return FIXED_OVERHEAD + rowLength + famLength + qualLength + valLength + tagsLength;
|
||||
}
|
||||
}
|
||||
|
||||
private static class OffheapPrefixTreeCell extends ByteBufferCell implements Cell,
|
||||
SettableSequenceId, HeapSize {
|
||||
private static final long FIXED_OVERHEAD = ClassSize.align(ClassSize.OBJECT
|
||||
+ (5 * ClassSize.REFERENCE) + (2 * Bytes.SIZEOF_LONG) + (4 * Bytes.SIZEOF_INT)
|
||||
+ (Bytes.SIZEOF_SHORT) + (2 * Bytes.SIZEOF_BYTE) + (5 * ClassSize.BYTE_BUFFER));
|
||||
private ByteBuffer rowBuff;
|
||||
private short rowLength;
|
||||
private ByteBuffer famBuff;
|
||||
private byte famLength;
|
||||
private ByteBuffer qualBuff;
|
||||
private int qualLength;
|
||||
private ByteBuffer val;
|
||||
private int valOffset;
|
||||
private int valLength;
|
||||
private ByteBuffer tagBuff;
|
||||
private int tagsLength;
|
||||
private long ts;
|
||||
private long seqId;
|
||||
private byte type;
|
||||
public OffheapPrefixTreeCell(byte[] row, int rowOffset, short rowLength, byte[] fam,
|
||||
int famOffset, byte famLength, byte[] qual, int qualOffset, int qualLength, ByteBuffer val,
|
||||
int valOffset, int valLength, byte[] tag, int tagOffset, int tagLength, long ts, byte type,
|
||||
long seqId) {
|
||||
byte[] tmpRow = new byte[rowLength];
|
||||
System.arraycopy(row, rowOffset, tmpRow, 0, rowLength);
|
||||
this.rowBuff = ByteBuffer.wrap(tmpRow);
|
||||
this.rowLength = rowLength;
|
||||
byte[] tmpFam = new byte[famLength];
|
||||
System.arraycopy(fam, famOffset, tmpFam, 0, famLength);
|
||||
this.famBuff = ByteBuffer.wrap(tmpFam);
|
||||
this.famLength = famLength;
|
||||
byte[] tmpQual = new byte[qualLength];
|
||||
System.arraycopy(qual, qualOffset, tmpQual, 0, qualLength);
|
||||
this.qualBuff = ByteBuffer.wrap(tmpQual);
|
||||
this.qualLength = qualLength;
|
||||
byte[] tmpTag = new byte[tagLength];
|
||||
System.arraycopy(tag, tagOffset, tmpTag, 0, tagLength);
|
||||
this.tagBuff = ByteBuffer.wrap(tmpTag);
|
||||
this.tagsLength = tagLength;
|
||||
this.val = val;
|
||||
this.valLength = valLength;
|
||||
this.valOffset = valOffset;
|
||||
this.ts = ts;
|
||||
this.seqId = seqId;
|
||||
this.type = type;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setSequenceId(long seqId) {
|
||||
this.seqId = seqId;
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte[] getRowArray() {
|
||||
return this.rowBuff.array();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getRowOffset() {
|
||||
return getRowPosition();
|
||||
}
|
||||
|
||||
@Override
|
||||
public short getRowLength() {
|
||||
return this.rowLength;
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte[] getFamilyArray() {
|
||||
return this.famBuff.array();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getFamilyOffset() {
|
||||
return getFamilyPosition();
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte getFamilyLength() {
|
||||
return this.famLength;
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte[] getQualifierArray() {
|
||||
return this.qualBuff.array();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getQualifierOffset() {
|
||||
return getQualifierPosition();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getQualifierLength() {
|
||||
return this.qualLength;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getTimestamp() {
|
||||
return ts;
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte getTypeByte() {
|
||||
return type;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getSequenceId() {
|
||||
return seqId;
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte[] getValueArray() {
|
||||
byte[] tmpVal = new byte[valLength];
|
||||
ByteBufferUtils.copyFromBufferToArray(tmpVal, val, valOffset, 0, valLength);
|
||||
return tmpVal;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getValueOffset() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getValueLength() {
|
||||
return this.valLength;
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte[] getTagsArray() {
|
||||
return this.tagBuff.array();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getTagsOffset() {
|
||||
return getTagsPosition();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getTagsLength() {
|
||||
return this.tagsLength;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ByteBuffer getRowByteBuffer() {
|
||||
return this.rowBuff;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getRowPosition() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ByteBuffer getFamilyByteBuffer() {
|
||||
return this.famBuff;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getFamilyPosition() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ByteBuffer getQualifierByteBuffer() {
|
||||
return this.qualBuff;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getQualifierPosition() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ByteBuffer getTagsByteBuffer() {
|
||||
return this.tagBuff;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getTagsPosition() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ByteBuffer getValueByteBuffer() {
|
||||
return this.val;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getValuePosition() {
|
||||
return this.valOffset;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long heapSize() {
|
||||
return FIXED_OVERHEAD;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
String row = Bytes.toStringBinary(getRowArray(), getRowOffset(), getRowLength());
|
||||
String family = Bytes.toStringBinary(getFamilyArray(), getFamilyOffset(), getFamilyLength());
|
||||
String qualifier = Bytes.toStringBinary(getQualifierArray(), getQualifierOffset(),
|
||||
getQualifierLength());
|
||||
String timestamp = String.valueOf((getTimestamp()));
|
||||
return row + "/" + family + (family != null && family.length() > 0 ? ":" : "") + qualifier
|
||||
+ "/" + timestamp + "/" + Type.codeToType(type);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,63 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.hbase.codec.prefixtree.decode;
|
||||
|
||||
import java.util.Queue;
|
||||
import java.util.concurrent.LinkedBlockingQueue;
|
||||
|
||||
import org.apache.yetus.audience.InterfaceAudience;
|
||||
import org.apache.hadoop.hbase.nio.ByteBuff;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Pools PrefixTreeArraySearcher objects. Each Searcher can consist of hundreds or thousands of
|
||||
* objects and 1 is needed for each HFile during a Get operation. With tens of thousands of
|
||||
* Gets/second, reusing these searchers may save a lot of young gen collections.
|
||||
* </p>
|
||||
* Alternative implementation would be a ByteBufferSearcherPool (not implemented yet).
|
||||
*/
|
||||
@InterfaceAudience.Private
|
||||
public class ArraySearcherPool {
|
||||
|
||||
/**
|
||||
* One decoder is needed for each storefile for each Get operation so we may need hundreds at the
|
||||
* same time, however, decoding is a CPU bound activity so should limit this to something in the
|
||||
* realm of maximum reasonable active threads.
|
||||
*/
|
||||
private static final Integer MAX_POOL_SIZE = 1000;
|
||||
|
||||
protected Queue<PrefixTreeArraySearcher> pool = new LinkedBlockingQueue<>(MAX_POOL_SIZE);
|
||||
|
||||
public PrefixTreeArraySearcher checkOut(ByteBuff buffer, boolean includesMvccVersion) {
|
||||
PrefixTreeArraySearcher searcher = pool.poll();//will return null if pool is empty
|
||||
searcher = DecoderFactory.ensureArraySearcherValid(buffer, searcher, includesMvccVersion);
|
||||
return searcher;
|
||||
}
|
||||
|
||||
public void checkIn(PrefixTreeArraySearcher searcher) {
|
||||
searcher.releaseBlockReference();
|
||||
pool.offer(searcher);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return ("poolSize:" + pool.size());
|
||||
}
|
||||
|
||||
}
|
|
@ -1,83 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.hbase.codec.prefixtree.decode;
|
||||
|
||||
|
||||
import org.apache.yetus.audience.InterfaceAudience;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeBlockMeta;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.scanner.CellSearcher;
|
||||
import org.apache.hadoop.hbase.nio.ByteBuff;
|
||||
/**
|
||||
* Static wrapper class for the ArraySearcherPool.
|
||||
*/
|
||||
@InterfaceAudience.Private
|
||||
public class DecoderFactory {
|
||||
private static final ArraySearcherPool POOL = new ArraySearcherPool();
|
||||
|
||||
//TODO will need a PrefixTreeSearcher on top of CellSearcher
|
||||
public static PrefixTreeArraySearcher checkOut(final ByteBuff buffer,
|
||||
boolean includeMvccVersion) {
|
||||
PrefixTreeArraySearcher searcher = POOL.checkOut(buffer,
|
||||
includeMvccVersion);
|
||||
return searcher;
|
||||
}
|
||||
|
||||
public static void checkIn(CellSearcher pSearcher) {
|
||||
if (pSearcher == null) {
|
||||
return;
|
||||
}
|
||||
if (! (pSearcher instanceof PrefixTreeArraySearcher)) {
|
||||
throw new IllegalArgumentException("Cannot return "+pSearcher.getClass()+" to "
|
||||
+DecoderFactory.class);
|
||||
}
|
||||
PrefixTreeArraySearcher searcher = (PrefixTreeArraySearcher) pSearcher;
|
||||
POOL.checkIn(searcher);
|
||||
}
|
||||
|
||||
|
||||
/**************************** helper ******************************/
|
||||
public static PrefixTreeArraySearcher ensureArraySearcherValid(ByteBuff buffer,
|
||||
PrefixTreeArraySearcher searcher, boolean includeMvccVersion) {
|
||||
if (searcher == null) {
|
||||
PrefixTreeBlockMeta blockMeta = new PrefixTreeBlockMeta(buffer);
|
||||
searcher = new PrefixTreeArraySearcher(blockMeta, blockMeta.getRowTreeDepth(),
|
||||
blockMeta.getMaxRowLength(), blockMeta.getMaxQualifierLength(),
|
||||
blockMeta.getMaxTagsLength());
|
||||
searcher.initOnBlock(blockMeta, buffer, includeMvccVersion);
|
||||
return searcher;
|
||||
}
|
||||
|
||||
PrefixTreeBlockMeta blockMeta = searcher.getBlockMeta();
|
||||
blockMeta.initOnBlock(buffer);
|
||||
if (!searcher.areBuffersBigEnough()) {
|
||||
int maxRowTreeStackNodes = Math.max(blockMeta.getRowTreeDepth(),
|
||||
searcher.getMaxRowTreeStackNodes());
|
||||
int rowBufferLength = Math.max(blockMeta.getMaxRowLength(), searcher.getRowBufferLength());
|
||||
int qualifierBufferLength = Math.max(blockMeta.getMaxQualifierLength(),
|
||||
searcher.getQualifierBufferLength());
|
||||
int tagBufferLength = Math.max(blockMeta.getMaxTagsLength(), searcher.getTagBufferLength());
|
||||
searcher = new PrefixTreeArraySearcher(blockMeta, maxRowTreeStackNodes, rowBufferLength,
|
||||
qualifierBufferLength, tagBufferLength);
|
||||
}
|
||||
//this is where we parse the BlockMeta
|
||||
searcher.initOnBlock(blockMeta, buffer, includeMvccVersion);
|
||||
return searcher;
|
||||
}
|
||||
|
||||
}
|
|
@ -1,145 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.hbase.codec.prefixtree.decode;
|
||||
|
||||
import org.apache.yetus.audience.InterfaceAudience;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeBlockMeta;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.scanner.ReversibleCellScanner;
|
||||
|
||||
/**
|
||||
* Methods for going backwards through a PrefixTree block. This class is split out on its own to
|
||||
* simplify the Scanner superclass and Searcher subclass.
|
||||
*/
|
||||
@InterfaceAudience.Private
|
||||
public class PrefixTreeArrayReversibleScanner extends PrefixTreeArrayScanner implements
|
||||
ReversibleCellScanner {
|
||||
|
||||
/***************** construct ******************************/
|
||||
|
||||
public PrefixTreeArrayReversibleScanner(PrefixTreeBlockMeta blockMeta, int rowTreeDepth,
|
||||
int rowBufferLength, int qualifierBufferLength, int tagsBufferLength) {
|
||||
super(blockMeta, rowTreeDepth, rowBufferLength, qualifierBufferLength, tagsBufferLength);
|
||||
}
|
||||
|
||||
|
||||
/***************** Object methods ***************************/
|
||||
|
||||
@Override
|
||||
public boolean equals(Object obj) {
|
||||
//trivial override to confirm intent (findbugs)
|
||||
return super.equals(obj);
|
||||
}
|
||||
|
||||
|
||||
/***************** methods **********************************/
|
||||
|
||||
@Override
|
||||
public boolean previous() {
|
||||
if (afterLast) {
|
||||
afterLast = false;
|
||||
positionAtLastCell();
|
||||
return true;
|
||||
}
|
||||
if (beforeFirst) {
|
||||
return false;
|
||||
}
|
||||
if (isFirstCellInRow()) {
|
||||
previousRowInternal();
|
||||
if (beforeFirst) {
|
||||
return false;
|
||||
}
|
||||
populateLastNonRowFields();
|
||||
return true;
|
||||
}
|
||||
populatePreviousNonRowFields();
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean previousRow(boolean endOfRow) {
|
||||
previousRowInternal();
|
||||
if(beforeFirst){
|
||||
return false;
|
||||
}
|
||||
if(endOfRow){
|
||||
populateLastNonRowFields();
|
||||
}else{
|
||||
populateFirstNonRowFields();
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
private boolean previousRowInternal() {
|
||||
if (beforeFirst) {
|
||||
return false;
|
||||
}
|
||||
if (afterLast) {
|
||||
positionAtLastRow();
|
||||
return true;
|
||||
}
|
||||
if (currentRowNode.hasOccurrences()) {
|
||||
discardCurrentRowNode(false);
|
||||
if(currentRowNode==null){
|
||||
return false;
|
||||
}
|
||||
}
|
||||
while (!beforeFirst) {
|
||||
if (isDirectlyAfterNub()) {//we are about to back up to the nub
|
||||
currentRowNode.resetFanIndex();//sets it to -1, which is before the first leaf
|
||||
nubCellsRemain = true;//this positions us on the nub
|
||||
return true;
|
||||
}
|
||||
if (currentRowNode.hasPreviousFanNodes()) {
|
||||
followPreviousFan();
|
||||
descendToLastRowFromCurrentPosition();
|
||||
} else {// keep going up the stack until we find previous fan positions
|
||||
discardCurrentRowNode(false);
|
||||
if(currentRowNode==null){
|
||||
return false;
|
||||
}
|
||||
}
|
||||
if (currentRowNode.hasOccurrences()) {// escape clause
|
||||
currentRowNode.resetFanIndex();
|
||||
return true;// found some values
|
||||
}
|
||||
}
|
||||
return false;// went past the beginning
|
||||
}
|
||||
|
||||
protected boolean isDirectlyAfterNub() {
|
||||
return currentRowNode.isNub() && currentRowNode.getFanIndex()==0;
|
||||
}
|
||||
|
||||
protected void positionAtLastRow() {
|
||||
reInitFirstNode();
|
||||
descendToLastRowFromCurrentPosition();
|
||||
}
|
||||
|
||||
protected void descendToLastRowFromCurrentPosition() {
|
||||
while (currentRowNode.hasChildren()) {
|
||||
followLastFan();
|
||||
}
|
||||
}
|
||||
|
||||
protected void positionAtLastCell() {
|
||||
positionAtLastRow();
|
||||
populateLastNonRowFields();
|
||||
}
|
||||
|
||||
}
|
|
@ -1,528 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.hbase.codec.prefixtree.decode;
|
||||
|
||||
import org.apache.yetus.audience.InterfaceAudience;
|
||||
import org.apache.hadoop.hbase.Cell;
|
||||
import org.apache.hadoop.hbase.CellScanner;
|
||||
import org.apache.hadoop.hbase.CellUtil;
|
||||
import org.apache.hadoop.hbase.PrivateCellUtil;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeBlockMeta;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.decode.column.ColumnReader;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.decode.row.RowNodeReader;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.decode.timestamp.MvccVersionDecoder;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.decode.timestamp.TimestampDecoder;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.encode.other.ColumnNodeType;
|
||||
import org.apache.hadoop.hbase.nio.ByteBuff;
|
||||
|
||||
/**
|
||||
* Extends PtCell and manipulates its protected fields. Could alternatively contain a PtCell and
|
||||
* call get/set methods.
|
||||
*
|
||||
* This is an "Array" scanner to distinguish from a future "ByteBuffer" scanner. This
|
||||
* implementation requires that the bytes be in a normal java byte[] for performance. The
|
||||
* alternative ByteBuffer implementation would allow for accessing data in an off-heap ByteBuffer
|
||||
* without copying the whole buffer on-heap.
|
||||
*/
|
||||
@InterfaceAudience.Private
|
||||
public class PrefixTreeArrayScanner extends PrefixTreeCell implements CellScanner {
|
||||
|
||||
/***************** fields ********************************/
|
||||
|
||||
protected PrefixTreeBlockMeta blockMeta;
|
||||
|
||||
protected boolean beforeFirst;
|
||||
protected boolean afterLast;
|
||||
|
||||
protected RowNodeReader[] rowNodes;
|
||||
protected int rowNodeStackIndex;
|
||||
|
||||
protected RowNodeReader currentRowNode;
|
||||
protected ColumnReader familyReader;
|
||||
protected ColumnReader qualifierReader;
|
||||
protected ColumnReader tagsReader;
|
||||
protected TimestampDecoder timestampDecoder;
|
||||
protected MvccVersionDecoder mvccVersionDecoder;
|
||||
|
||||
protected boolean nubCellsRemain;
|
||||
protected int currentCellIndex;
|
||||
|
||||
/*********************** construct ******************************/
|
||||
|
||||
// pass in blockMeta so we can initialize buffers big enough for all cells in the block
|
||||
public PrefixTreeArrayScanner(PrefixTreeBlockMeta blockMeta, int rowTreeDepth,
|
||||
int rowBufferLength, int qualifierBufferLength, int tagsBufferLength) {
|
||||
this.rowNodes = new RowNodeReader[rowTreeDepth];
|
||||
for (int i = 0; i < rowNodes.length; ++i) {
|
||||
rowNodes[i] = new RowNodeReader();
|
||||
}
|
||||
this.rowBuffer = new byte[rowBufferLength];
|
||||
this.familyBuffer = new byte[PrefixTreeBlockMeta.MAX_FAMILY_LENGTH];
|
||||
this.familyReader = new ColumnReader(familyBuffer, ColumnNodeType.FAMILY);
|
||||
this.qualifierBuffer = new byte[qualifierBufferLength];
|
||||
this.tagsBuffer = new byte[tagsBufferLength];
|
||||
this.qualifierReader = new ColumnReader(qualifierBuffer, ColumnNodeType.QUALIFIER);
|
||||
this.tagsReader = new ColumnReader(tagsBuffer, ColumnNodeType.TAGS);
|
||||
this.timestampDecoder = new TimestampDecoder();
|
||||
this.mvccVersionDecoder = new MvccVersionDecoder();
|
||||
}
|
||||
|
||||
|
||||
/**************** init helpers ***************************************/
|
||||
|
||||
/**
|
||||
* Call when first accessing a block.
|
||||
* @return entirely new scanner if false
|
||||
*/
|
||||
public boolean areBuffersBigEnough() {
|
||||
if (rowNodes.length < blockMeta.getRowTreeDepth()) {
|
||||
return false;
|
||||
}
|
||||
if (rowBuffer.length < blockMeta.getMaxRowLength()) {
|
||||
return false;
|
||||
}
|
||||
if (qualifierBuffer.length < blockMeta.getMaxQualifierLength()) {
|
||||
return false;
|
||||
}
|
||||
if(tagsBuffer.length < blockMeta.getMaxTagsLength()) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
public void initOnBlock(PrefixTreeBlockMeta blockMeta, ByteBuff block,
|
||||
boolean includeMvccVersion) {
|
||||
this.block = block;
|
||||
this.blockMeta = blockMeta;
|
||||
this.familyOffset = familyBuffer.length;
|
||||
this.familyReader.initOnBlock(blockMeta, block);
|
||||
this.qualifierOffset = qualifierBuffer.length;
|
||||
this.qualifierReader.initOnBlock(blockMeta, block);
|
||||
this.tagsOffset = tagsBuffer.length;
|
||||
this.tagsReader.initOnBlock(blockMeta, block);
|
||||
this.timestampDecoder.initOnBlock(blockMeta, block);
|
||||
this.mvccVersionDecoder.initOnBlock(blockMeta, block);
|
||||
this.includeMvccVersion = includeMvccVersion;
|
||||
resetToBeforeFirstEntry();
|
||||
}
|
||||
|
||||
// Does this have to be in the CellScanner Interface? TODO
|
||||
public void resetToBeforeFirstEntry() {
|
||||
beforeFirst = true;
|
||||
afterLast = false;
|
||||
rowNodeStackIndex = -1;
|
||||
currentRowNode = null;
|
||||
rowLength = 0;
|
||||
familyOffset = familyBuffer.length;
|
||||
familyLength = 0;
|
||||
qualifierOffset = blockMeta.getMaxQualifierLength();
|
||||
qualifierLength = 0;
|
||||
nubCellsRemain = false;
|
||||
currentCellIndex = -1;
|
||||
timestamp = -1L;
|
||||
type = DEFAULT_TYPE;
|
||||
absoluteValueOffset = 0;//use 0 vs -1 so the cell is valid when value hasn't been initialized
|
||||
valueLength = 0;// had it at -1, but that causes null Cell to add up to the wrong length
|
||||
tagsOffset = blockMeta.getMaxTagsLength();
|
||||
tagsLength = 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Call this before putting the scanner back into a pool so it doesn't hold the last used block
|
||||
* in memory.
|
||||
*/
|
||||
public void releaseBlockReference(){
|
||||
block = null;
|
||||
}
|
||||
|
||||
|
||||
/********************** CellScanner **********************/
|
||||
|
||||
@Override
|
||||
public Cell current() {
|
||||
if(isOutOfBounds()){
|
||||
return null;
|
||||
}
|
||||
return (Cell)this;
|
||||
}
|
||||
|
||||
/******************* Object methods ************************/
|
||||
|
||||
@Override
|
||||
public boolean equals(Object obj) {
|
||||
//trivial override to confirm intent (findbugs)
|
||||
return super.equals(obj);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return super.hashCode();
|
||||
}
|
||||
|
||||
/**
|
||||
* Override PrefixTreeCell.toString() with a check to see if the current cell is valid.
|
||||
*/
|
||||
@Override
|
||||
public String toString() {
|
||||
Cell currentCell = current();
|
||||
if(currentCell==null){
|
||||
return "null";
|
||||
}
|
||||
return ((PrefixTreeCell)currentCell).getKeyValueString();
|
||||
}
|
||||
|
||||
|
||||
/******************* advance ***************************/
|
||||
|
||||
public boolean positionAtFirstCell() {
|
||||
reInitFirstNode();
|
||||
return advance();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean advance() {
|
||||
if (afterLast) {
|
||||
return false;
|
||||
}
|
||||
if (!hasOccurrences()) {
|
||||
resetToBeforeFirstEntry();
|
||||
}
|
||||
if (beforeFirst || isLastCellInRow()) {
|
||||
nextRow();
|
||||
if (afterLast) {
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
++currentCellIndex;
|
||||
}
|
||||
|
||||
populateNonRowFields(currentCellIndex);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
public boolean nextRow() {
|
||||
nextRowInternal();
|
||||
if (afterLast) {
|
||||
return false;
|
||||
}
|
||||
populateNonRowFields(currentCellIndex);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* This method is safe to call when the scanner is not on a fully valid row node, as in the case
|
||||
* of a row token miss in the Searcher
|
||||
* @return true if we are positioned on a valid row, false if past end of block
|
||||
*/
|
||||
protected boolean nextRowInternal() {
|
||||
if (afterLast) {
|
||||
return false;
|
||||
}
|
||||
if (beforeFirst) {
|
||||
initFirstNode();
|
||||
if (currentRowNode.hasOccurrences()) {
|
||||
if (currentRowNode.isNub()) {
|
||||
nubCellsRemain = true;
|
||||
}
|
||||
currentCellIndex = 0;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
if (currentRowNode.isLeaf()) {
|
||||
discardCurrentRowNode(true);
|
||||
}
|
||||
while (!afterLast) {
|
||||
if (nubCellsRemain) {
|
||||
nubCellsRemain = false;
|
||||
}
|
||||
if (currentRowNode.hasMoreFanNodes()) {
|
||||
followNextFan();
|
||||
if (currentRowNode.hasOccurrences()) {
|
||||
// found some values
|
||||
currentCellIndex = 0;
|
||||
return true;
|
||||
}
|
||||
} else {
|
||||
discardCurrentRowNode(true);
|
||||
}
|
||||
}
|
||||
return false;// went past the end
|
||||
}
|
||||
|
||||
|
||||
/**************** secondary traversal methods ******************************/
|
||||
|
||||
protected void reInitFirstNode() {
|
||||
resetToBeforeFirstEntry();
|
||||
initFirstNode();
|
||||
}
|
||||
|
||||
protected void initFirstNode() {
|
||||
int offsetIntoUnderlyingStructure = blockMeta.getAbsoluteRowOffset();
|
||||
rowNodeStackIndex = 0;
|
||||
currentRowNode = rowNodes[0];
|
||||
currentRowNode.initOnBlock(blockMeta, block, offsetIntoUnderlyingStructure);
|
||||
appendCurrentTokenToRowBuffer();
|
||||
beforeFirst = false;
|
||||
}
|
||||
|
||||
protected void followFirstFan() {
|
||||
followFan(0);
|
||||
}
|
||||
|
||||
protected void followPreviousFan() {
|
||||
int nextFanPosition = currentRowNode.getFanIndex() - 1;
|
||||
followFan(nextFanPosition);
|
||||
}
|
||||
|
||||
protected void followCurrentFan() {
|
||||
int currentFanPosition = currentRowNode.getFanIndex();
|
||||
followFan(currentFanPosition);
|
||||
}
|
||||
|
||||
protected void followNextFan() {
|
||||
int nextFanPosition = currentRowNode.getFanIndex() + 1;
|
||||
followFan(nextFanPosition);
|
||||
}
|
||||
|
||||
protected void followLastFan() {
|
||||
followFan(currentRowNode.getLastFanIndex());
|
||||
}
|
||||
|
||||
protected void followFan(int fanIndex) {
|
||||
currentRowNode.setFanIndex(fanIndex);
|
||||
appendToRowBuffer(currentRowNode.getFanByte(fanIndex));
|
||||
|
||||
int nextOffsetIntoUnderlyingStructure = currentRowNode.getOffset()
|
||||
+ currentRowNode.getNextNodeOffset(fanIndex, blockMeta);
|
||||
++rowNodeStackIndex;
|
||||
|
||||
currentRowNode = rowNodes[rowNodeStackIndex];
|
||||
currentRowNode.initOnBlock(blockMeta, block, nextOffsetIntoUnderlyingStructure);
|
||||
|
||||
//TODO getToken is spewing garbage
|
||||
appendCurrentTokenToRowBuffer();
|
||||
if (currentRowNode.isNub()) {
|
||||
nubCellsRemain = true;
|
||||
}
|
||||
currentCellIndex = 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param forwards which marker to set if we overflow
|
||||
*/
|
||||
protected void discardCurrentRowNode(boolean forwards) {
|
||||
RowNodeReader rowNodeBeingPopped = currentRowNode;
|
||||
--rowNodeStackIndex;// pop it off the stack
|
||||
if (rowNodeStackIndex < 0) {
|
||||
currentRowNode = null;
|
||||
if (forwards) {
|
||||
markAfterLast();
|
||||
} else {
|
||||
markBeforeFirst();
|
||||
}
|
||||
return;
|
||||
}
|
||||
popFromRowBuffer(rowNodeBeingPopped);
|
||||
currentRowNode = rowNodes[rowNodeStackIndex];
|
||||
}
|
||||
|
||||
protected void markBeforeFirst() {
|
||||
beforeFirst = true;
|
||||
afterLast = false;
|
||||
currentRowNode = null;
|
||||
}
|
||||
|
||||
protected void markAfterLast() {
|
||||
beforeFirst = false;
|
||||
afterLast = true;
|
||||
currentRowNode = null;
|
||||
}
|
||||
|
||||
|
||||
/***************** helper methods **************************/
|
||||
|
||||
protected void appendCurrentTokenToRowBuffer() {
|
||||
block.get(currentRowNode.getTokenArrayOffset(), rowBuffer, rowLength,
|
||||
currentRowNode.getTokenLength());
|
||||
rowLength += currentRowNode.getTokenLength();
|
||||
}
|
||||
|
||||
protected void appendToRowBuffer(byte b) {
|
||||
rowBuffer[rowLength] = b;
|
||||
++rowLength;
|
||||
}
|
||||
|
||||
protected void popFromRowBuffer(RowNodeReader rowNodeBeingPopped) {
|
||||
rowLength -= rowNodeBeingPopped.getTokenLength();
|
||||
--rowLength; // pop the parent's fan byte
|
||||
}
|
||||
|
||||
protected boolean hasOccurrences() {
|
||||
return currentRowNode != null && currentRowNode.hasOccurrences();
|
||||
}
|
||||
|
||||
protected boolean isBranch() {
|
||||
return currentRowNode != null && !currentRowNode.hasOccurrences()
|
||||
&& currentRowNode.hasChildren();
|
||||
}
|
||||
|
||||
protected boolean isNub() {
|
||||
return currentRowNode != null && currentRowNode.hasOccurrences()
|
||||
&& currentRowNode.hasChildren();
|
||||
}
|
||||
|
||||
protected boolean isLeaf() {
|
||||
return currentRowNode != null && currentRowNode.hasOccurrences()
|
||||
&& !currentRowNode.hasChildren();
|
||||
}
|
||||
|
||||
//TODO expose this in a PrefixTreeScanner interface
|
||||
public boolean isBeforeFirst(){
|
||||
return beforeFirst;
|
||||
}
|
||||
|
||||
public boolean isAfterLast(){
|
||||
return afterLast;
|
||||
}
|
||||
|
||||
protected boolean isOutOfBounds(){
|
||||
return beforeFirst || afterLast;
|
||||
}
|
||||
|
||||
protected boolean isFirstCellInRow() {
|
||||
return currentCellIndex == 0;
|
||||
}
|
||||
|
||||
protected boolean isLastCellInRow() {
|
||||
return currentCellIndex == currentRowNode.getLastCellIndex();
|
||||
}
|
||||
|
||||
|
||||
/********************* fill in family/qualifier/ts/type/value ************/
|
||||
|
||||
protected int populateNonRowFieldsAndCompareTo(int cellNum, Cell key) {
|
||||
populateNonRowFields(cellNum);
|
||||
return PrivateCellUtil.compareKeyIgnoresMvcc(comparator, this, key);
|
||||
}
|
||||
|
||||
protected void populateFirstNonRowFields() {
|
||||
populateNonRowFields(0);
|
||||
}
|
||||
|
||||
protected void populatePreviousNonRowFields() {
|
||||
populateNonRowFields(currentCellIndex - 1);
|
||||
}
|
||||
|
||||
protected void populateLastNonRowFields() {
|
||||
populateNonRowFields(currentRowNode.getLastCellIndex());
|
||||
}
|
||||
|
||||
protected void populateNonRowFields(int cellIndex) {
|
||||
currentCellIndex = cellIndex;
|
||||
populateFamily();
|
||||
populateQualifier();
|
||||
// Read tags only if there are tags in the meta
|
||||
if(blockMeta.getNumTagsBytes() != 0) {
|
||||
populateTag();
|
||||
}
|
||||
populateTimestamp();
|
||||
populateMvccVersion();
|
||||
populateType();
|
||||
populateValueOffsets();
|
||||
}
|
||||
|
||||
protected void populateFamily() {
|
||||
int familyTreeIndex = currentRowNode.getFamilyOffset(currentCellIndex, blockMeta);
|
||||
familyOffset = familyReader.populateBuffer(familyTreeIndex).getColumnOffset();
|
||||
familyLength = familyReader.getColumnLength();
|
||||
}
|
||||
|
||||
protected void populateQualifier() {
|
||||
int qualifierTreeIndex = currentRowNode.getColumnOffset(currentCellIndex, blockMeta);
|
||||
qualifierOffset = qualifierReader.populateBuffer(qualifierTreeIndex).getColumnOffset();
|
||||
qualifierLength = qualifierReader.getColumnLength();
|
||||
}
|
||||
|
||||
protected void populateTag() {
|
||||
int tagTreeIndex = currentRowNode.getTagOffset(currentCellIndex, blockMeta);
|
||||
tagsOffset = tagsReader.populateBuffer(tagTreeIndex).getColumnOffset();
|
||||
tagsLength = tagsReader.getColumnLength();
|
||||
}
|
||||
|
||||
protected void populateTimestamp() {
|
||||
if (blockMeta.isAllSameTimestamp()) {
|
||||
timestamp = blockMeta.getMinTimestamp();
|
||||
} else {
|
||||
int timestampIndex = currentRowNode.getTimestampIndex(currentCellIndex, blockMeta);
|
||||
timestamp = timestampDecoder.getLong(timestampIndex);
|
||||
}
|
||||
}
|
||||
|
||||
protected void populateMvccVersion() {
|
||||
if (blockMeta.isAllSameMvccVersion()) {
|
||||
mvccVersion = blockMeta.getMinMvccVersion();
|
||||
} else {
|
||||
int mvccVersionIndex = currentRowNode.getMvccVersionIndex(currentCellIndex,
|
||||
blockMeta);
|
||||
mvccVersion = mvccVersionDecoder.getMvccVersion(mvccVersionIndex);
|
||||
}
|
||||
}
|
||||
|
||||
protected void populateType() {
|
||||
int typeInt;
|
||||
if (blockMeta.isAllSameType()) {
|
||||
typeInt = blockMeta.getAllTypes();
|
||||
} else {
|
||||
typeInt = currentRowNode.getType(currentCellIndex, blockMeta);
|
||||
}
|
||||
type = PrefixTreeCell.TYPES[typeInt];
|
||||
}
|
||||
|
||||
protected void populateValueOffsets() {
|
||||
int offsetIntoValueSection = currentRowNode.getValueOffset(currentCellIndex, blockMeta);
|
||||
absoluteValueOffset = blockMeta.getAbsoluteValueOffset() + offsetIntoValueSection;
|
||||
valueLength = currentRowNode.getValueLength(currentCellIndex, blockMeta);
|
||||
this.block.asSubByteBuffer(this.absoluteValueOffset, valueLength, pair);
|
||||
}
|
||||
|
||||
/**************** getters ***************************/
|
||||
|
||||
public PrefixTreeBlockMeta getBlockMeta() {
|
||||
return blockMeta;
|
||||
}
|
||||
|
||||
public int getMaxRowTreeStackNodes() {
|
||||
return rowNodes.length;
|
||||
}
|
||||
|
||||
public int getRowBufferLength() {
|
||||
return rowBuffer.length;
|
||||
}
|
||||
|
||||
public int getQualifierBufferLength() {
|
||||
return qualifierBuffer.length;
|
||||
}
|
||||
|
||||
public int getTagBufferLength() {
|
||||
return tagsBuffer.length;
|
||||
}
|
||||
}
|
|
@ -1,418 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.hbase.codec.prefixtree.decode;
|
||||
|
||||
import org.apache.yetus.audience.InterfaceAudience;
|
||||
import org.apache.hadoop.hbase.Cell;
|
||||
import org.apache.hadoop.hbase.CellUtil;
|
||||
import org.apache.hadoop.hbase.PrivateCellUtil;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeBlockMeta;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.scanner.CellScannerPosition;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.scanner.CellSearcher;
|
||||
|
||||
import org.apache.hadoop.hbase.shaded.com.google.common.primitives.UnsignedBytes;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Searcher extends the capabilities of the Scanner + ReversibleScanner to add the ability to
|
||||
* position itself on a requested Cell without scanning through cells before it. The PrefixTree is
|
||||
* set up to be a Trie of rows, so finding a particular row is extremely cheap.
|
||||
* </p>
|
||||
* Once it finds the row, it does a binary search through the cells inside the row, which is not as
|
||||
* fast as the trie search, but faster than iterating through every cell like existing block
|
||||
* formats
|
||||
* do. For this reason, this implementation is targeted towards schemas where rows are narrow
|
||||
* enough
|
||||
* to have several or many per block, and where you are generally looking for the entire row or
|
||||
* the
|
||||
* first cell. It will still be fast for wide rows or point queries, but could be improved upon.
|
||||
*/
|
||||
@InterfaceAudience.Private
|
||||
public class PrefixTreeArraySearcher extends PrefixTreeArrayReversibleScanner implements
|
||||
CellSearcher {
|
||||
|
||||
/*************** construct ******************************/
|
||||
|
||||
public PrefixTreeArraySearcher(PrefixTreeBlockMeta blockMeta, int rowTreeDepth,
|
||||
int rowBufferLength, int qualifierBufferLength, int tagsBufferLength) {
|
||||
super(blockMeta, rowTreeDepth, rowBufferLength, qualifierBufferLength, tagsBufferLength);
|
||||
}
|
||||
|
||||
|
||||
/********************* CellSearcher methods *******************/
|
||||
|
||||
@Override
|
||||
public boolean positionAt(Cell key) {
|
||||
return CellScannerPosition.AT == positionAtOrAfter(key);
|
||||
}
|
||||
|
||||
@Override
|
||||
public CellScannerPosition positionAtOrBefore(Cell key) {
|
||||
reInitFirstNode();
|
||||
int fanIndex = -1;
|
||||
|
||||
while(true){
|
||||
//detect row mismatch. break loop if mismatch
|
||||
int currentNodeDepth = rowLength;
|
||||
int rowTokenComparison = compareToCurrentToken(key);
|
||||
if(rowTokenComparison != 0){
|
||||
return fixRowTokenMissReverse(rowTokenComparison);
|
||||
}
|
||||
|
||||
//exact row found, move on to qualifier & ts
|
||||
if(rowMatchesAfterCurrentPosition(key)){
|
||||
return positionAtQualifierTimestamp(key, true);
|
||||
}
|
||||
|
||||
//detect dead end (no fan to descend into)
|
||||
if(!currentRowNode.hasFan()){
|
||||
if(hasOccurrences()){//must be leaf or nub
|
||||
populateLastNonRowFields();
|
||||
return CellScannerPosition.BEFORE;
|
||||
}else{
|
||||
//TODO i don't think this case is exercised by any tests
|
||||
return fixRowFanMissReverse(0);
|
||||
}
|
||||
}
|
||||
|
||||
//keep hunting for the rest of the row
|
||||
byte searchForByte = PrivateCellUtil.getRowByte(key, currentNodeDepth);
|
||||
fanIndex = currentRowNode.whichFanNode(searchForByte);
|
||||
if(fanIndex < 0){//no matching row. return early
|
||||
int insertionPoint = -fanIndex - 1;
|
||||
return fixRowFanMissReverse(insertionPoint);
|
||||
}
|
||||
//found a match, so dig deeper into the tree
|
||||
followFan(fanIndex);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Identical workflow as positionAtOrBefore, but split them to avoid having ~10 extra
|
||||
* if-statements. Priority on readability and debugability.
|
||||
*/
|
||||
@Override
|
||||
public CellScannerPosition positionAtOrAfter(Cell key) {
|
||||
reInitFirstNode();
|
||||
int fanIndex = -1;
|
||||
|
||||
while(true){
|
||||
//detect row mismatch. break loop if mismatch
|
||||
int currentNodeDepth = rowLength;
|
||||
int rowTokenComparison = compareToCurrentToken(key);
|
||||
if(rowTokenComparison != 0){
|
||||
return fixRowTokenMissForward(rowTokenComparison);
|
||||
}
|
||||
|
||||
//exact row found, move on to qualifier & ts
|
||||
if(rowMatchesAfterCurrentPosition(key)){
|
||||
return positionAtQualifierTimestamp(key, false);
|
||||
}
|
||||
|
||||
//detect dead end (no fan to descend into)
|
||||
if(!currentRowNode.hasFan()){
|
||||
if(hasOccurrences()){
|
||||
if (rowLength < key.getRowLength()) {
|
||||
nextRow();
|
||||
} else {
|
||||
populateFirstNonRowFields();
|
||||
}
|
||||
return CellScannerPosition.AFTER;
|
||||
}else{
|
||||
//TODO i don't think this case is exercised by any tests
|
||||
return fixRowFanMissForward(0);
|
||||
}
|
||||
}
|
||||
|
||||
//keep hunting for the rest of the row
|
||||
byte searchForByte = PrivateCellUtil.getRowByte(key, currentNodeDepth);
|
||||
fanIndex = currentRowNode.whichFanNode(searchForByte);
|
||||
if(fanIndex < 0){//no matching row. return early
|
||||
int insertionPoint = -fanIndex - 1;
|
||||
return fixRowFanMissForward(insertionPoint);
|
||||
}
|
||||
//found a match, so dig deeper into the tree
|
||||
followFan(fanIndex);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean seekForwardTo(Cell key) {
|
||||
if(currentPositionIsAfter(key)){
|
||||
//our position is after the requested key, so can't do anything
|
||||
return false;
|
||||
}
|
||||
return positionAt(key);
|
||||
}
|
||||
|
||||
@Override
|
||||
public CellScannerPosition seekForwardToOrBefore(Cell key) {
|
||||
//Do we even need this check or should upper layers avoid this situation. It's relatively
|
||||
//expensive compared to the rest of the seek operation.
|
||||
if(currentPositionIsAfter(key)){
|
||||
//our position is after the requested key, so can't do anything
|
||||
return CellScannerPosition.AFTER;
|
||||
}
|
||||
|
||||
return positionAtOrBefore(key);
|
||||
}
|
||||
|
||||
@Override
|
||||
public CellScannerPosition seekForwardToOrAfter(Cell key) {
|
||||
//Do we even need this check or should upper layers avoid this situation. It's relatively
|
||||
//expensive compared to the rest of the seek operation.
|
||||
if(currentPositionIsAfter(key)){
|
||||
//our position is after the requested key, so can't do anything
|
||||
return CellScannerPosition.AFTER;
|
||||
}
|
||||
|
||||
return positionAtOrAfter(key);
|
||||
}
|
||||
|
||||
/**
|
||||
* The content of the buffers doesn't matter here, only that afterLast=true and beforeFirst=false
|
||||
*/
|
||||
@Override
|
||||
public void positionAfterLastCell() {
|
||||
resetToBeforeFirstEntry();
|
||||
beforeFirst = false;
|
||||
afterLast = true;
|
||||
}
|
||||
|
||||
|
||||
/***************** Object methods ***************************/
|
||||
|
||||
@Override
|
||||
public boolean equals(Object obj) {
|
||||
//trivial override to confirm intent (findbugs)
|
||||
return super.equals(obj);
|
||||
}
|
||||
|
||||
|
||||
/****************** internal methods ************************/
|
||||
|
||||
protected boolean currentPositionIsAfter(Cell cell){
|
||||
return compareTo(cell) > 0;
|
||||
}
|
||||
|
||||
protected CellScannerPosition positionAtQualifierTimestamp(Cell key, boolean beforeOnMiss) {
|
||||
int minIndex = 0;
|
||||
int maxIndex = currentRowNode.getLastCellIndex();
|
||||
int diff;
|
||||
while (true) {
|
||||
int midIndex = (maxIndex + minIndex) / 2;//don't worry about overflow
|
||||
diff = populateNonRowFieldsAndCompareTo(midIndex, key);
|
||||
|
||||
if (diff == 0) {// found exact match
|
||||
return CellScannerPosition.AT;
|
||||
} else if (minIndex == maxIndex) {// even termination case
|
||||
break;
|
||||
} else if ((minIndex + 1) == maxIndex) {// odd termination case
|
||||
diff = populateNonRowFieldsAndCompareTo(maxIndex, key);
|
||||
if(diff > 0){
|
||||
diff = populateNonRowFieldsAndCompareTo(minIndex, key);
|
||||
}
|
||||
break;
|
||||
} else if (diff < 0) {// keep going forward
|
||||
minIndex = currentCellIndex;
|
||||
} else {// went past it, back up
|
||||
maxIndex = currentCellIndex;
|
||||
}
|
||||
}
|
||||
|
||||
if (diff == 0) {
|
||||
return CellScannerPosition.AT;
|
||||
|
||||
} else if (diff < 0) {// we are before key
|
||||
if (beforeOnMiss) {
|
||||
return CellScannerPosition.BEFORE;
|
||||
}
|
||||
if (advance()) {
|
||||
return CellScannerPosition.AFTER;
|
||||
}
|
||||
return CellScannerPosition.AFTER_LAST;
|
||||
|
||||
} else {// we are after key
|
||||
if (!beforeOnMiss) {
|
||||
return CellScannerPosition.AFTER;
|
||||
}
|
||||
if (previous()) {
|
||||
return CellScannerPosition.BEFORE;
|
||||
}
|
||||
return CellScannerPosition.BEFORE_FIRST;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* compare this.row to key.row but starting at the current rowLength
|
||||
* @param key Cell being searched for
|
||||
* @return true if row buffer contents match key.row
|
||||
*/
|
||||
protected boolean rowMatchesAfterCurrentPosition(Cell key) {
|
||||
if (!currentRowNode.hasOccurrences()) {
|
||||
return false;
|
||||
}
|
||||
int thatRowLength = key.getRowLength();
|
||||
if (rowLength != thatRowLength) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// TODO move part of this to Cell comparator?
|
||||
/**
|
||||
* Compare only the bytes within the window of the current token
|
||||
* @param key
|
||||
* @return return -1 if key is lessThan (before) this, 0 if equal, and 1 if key is after
|
||||
*/
|
||||
protected int compareToCurrentToken(Cell key) {
|
||||
int startIndex = rowLength - currentRowNode.getTokenLength();
|
||||
int endIndexExclusive = startIndex + currentRowNode.getTokenLength();
|
||||
for (int i = startIndex; i < endIndexExclusive; ++i) {
|
||||
if (i >= key.getRowLength()) {// key was shorter, so it's first
|
||||
return -1;
|
||||
}
|
||||
byte keyByte = PrivateCellUtil.getRowByte(key, i);
|
||||
byte thisByte = rowBuffer[i];
|
||||
if (keyByte == thisByte) {
|
||||
continue;
|
||||
}
|
||||
return UnsignedBytes.compare(keyByte, thisByte);
|
||||
}
|
||||
if (!currentRowNode.hasOccurrences() && rowLength >= key.getRowLength()) { // key was shorter
|
||||
return -1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
protected void followLastFansUntilExhausted(){
|
||||
while(currentRowNode.hasFan()){
|
||||
followLastFan();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/****************** complete seek when token mismatch ******************/
|
||||
|
||||
/**
|
||||
* @param searcherIsAfterInputKey <0: input key is before the searcher's position<br>
|
||||
* >0: input key is after the searcher's position
|
||||
*/
|
||||
protected CellScannerPosition fixRowTokenMissReverse(int searcherIsAfterInputKey) {
|
||||
if (searcherIsAfterInputKey < 0) {//searcher position is after the input key, so back up
|
||||
boolean foundPreviousRow = previousRow(true);
|
||||
if(foundPreviousRow){
|
||||
populateLastNonRowFields();
|
||||
return CellScannerPosition.BEFORE;
|
||||
}else{
|
||||
return CellScannerPosition.BEFORE_FIRST;
|
||||
}
|
||||
|
||||
}else{//searcher position is before the input key
|
||||
if(currentRowNode.hasOccurrences()){
|
||||
populateFirstNonRowFields();
|
||||
return CellScannerPosition.BEFORE;
|
||||
}
|
||||
boolean foundNextRow = nextRow();
|
||||
if(foundNextRow){
|
||||
return CellScannerPosition.AFTER;
|
||||
}else{
|
||||
return CellScannerPosition.AFTER_LAST;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param searcherIsAfterInputKey <0: input key is before the searcher's position<br>
|
||||
* >0: input key is after the searcher's position
|
||||
*/
|
||||
protected CellScannerPosition fixRowTokenMissForward(int searcherIsAfterInputKey) {
|
||||
if (searcherIsAfterInputKey < 0) {//searcher position is after the input key
|
||||
if(currentRowNode.hasOccurrences()){
|
||||
populateFirstNonRowFields();
|
||||
return CellScannerPosition.AFTER;
|
||||
}
|
||||
boolean foundNextRow = nextRow();
|
||||
if(foundNextRow){
|
||||
return CellScannerPosition.AFTER;
|
||||
}else{
|
||||
return CellScannerPosition.AFTER_LAST;
|
||||
}
|
||||
|
||||
}else{//searcher position is before the input key, so go forward
|
||||
discardCurrentRowNode(true);
|
||||
boolean foundNextRow = nextRow();
|
||||
if(foundNextRow){
|
||||
return CellScannerPosition.AFTER;
|
||||
}else{
|
||||
return CellScannerPosition.AFTER_LAST;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/****************** complete seek when fan mismatch ******************/
|
||||
|
||||
protected CellScannerPosition fixRowFanMissReverse(int fanInsertionPoint){
|
||||
if(fanInsertionPoint == 0){//we need to back up a row
|
||||
if (currentRowNode.hasOccurrences()) {
|
||||
populateLastNonRowFields();
|
||||
return CellScannerPosition.BEFORE;
|
||||
}
|
||||
boolean foundPreviousRow = previousRow(true);//true -> position on last cell in row
|
||||
if(foundPreviousRow){
|
||||
populateLastNonRowFields();
|
||||
return CellScannerPosition.BEFORE;
|
||||
}
|
||||
return CellScannerPosition.BEFORE_FIRST;
|
||||
}
|
||||
|
||||
//follow the previous fan, but then descend recursively forward
|
||||
followFan(fanInsertionPoint - 1);
|
||||
followLastFansUntilExhausted();
|
||||
populateLastNonRowFields();
|
||||
return CellScannerPosition.BEFORE;
|
||||
}
|
||||
|
||||
protected CellScannerPosition fixRowFanMissForward(int fanInsertionPoint){
|
||||
if(fanInsertionPoint >= currentRowNode.getFanOut()){
|
||||
discardCurrentRowNode(true);
|
||||
if (!nextRow()) {
|
||||
return CellScannerPosition.AFTER_LAST;
|
||||
} else {
|
||||
return CellScannerPosition.AFTER;
|
||||
}
|
||||
}
|
||||
|
||||
followFan(fanInsertionPoint);
|
||||
if(hasOccurrences()){
|
||||
populateFirstNonRowFields();
|
||||
return CellScannerPosition.AFTER;
|
||||
}
|
||||
|
||||
if(nextRowInternal()){
|
||||
populateFirstNonRowFields();
|
||||
return CellScannerPosition.AFTER;
|
||||
|
||||
}else{
|
||||
return CellScannerPosition.AFTER_LAST;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -1,311 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.hbase.codec.prefixtree.decode;
|
||||
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import org.apache.hadoop.hbase.ByteBufferCell;
|
||||
import org.apache.hadoop.hbase.Cell;
|
||||
import org.apache.hadoop.hbase.CellComparator;
|
||||
import org.apache.hadoop.hbase.CellComparatorImpl;
|
||||
import org.apache.hadoop.hbase.PrivateCellUtil;
|
||||
import org.apache.hadoop.hbase.KeyValue;
|
||||
import org.apache.hadoop.hbase.KeyValueUtil;
|
||||
import org.apache.hadoop.hbase.SettableSequenceId;
|
||||
import org.apache.yetus.audience.InterfaceAudience;
|
||||
import org.apache.hadoop.hbase.nio.ByteBuff;
|
||||
import org.apache.hadoop.hbase.util.ByteBufferUtils;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
import org.apache.hadoop.hbase.util.ObjectIntPair;
|
||||
|
||||
/**
|
||||
* As the PrefixTreeArrayScanner moves through the tree bytes, it changes the
|
||||
* values in the fields of this class so that Cell logic can be applied, but
|
||||
* without allocating new memory for every Cell iterated through.
|
||||
*/
|
||||
@InterfaceAudience.Private
|
||||
public class PrefixTreeCell extends ByteBufferCell implements SettableSequenceId,
|
||||
Comparable<Cell> {
|
||||
// Create a reference here? Can be removed too
|
||||
protected CellComparator comparator = CellComparatorImpl.COMPARATOR;
|
||||
|
||||
/********************** static **********************/
|
||||
|
||||
public static final KeyValue.Type[] TYPES = new KeyValue.Type[256];
|
||||
static {
|
||||
for (KeyValue.Type type : KeyValue.Type.values()) {
|
||||
TYPES[type.getCode() & 0xff] = type;
|
||||
}
|
||||
}
|
||||
|
||||
// Same as KeyValue constructor. Only used to avoid NPE's when full cell
|
||||
// hasn't been initialized.
|
||||
public static final KeyValue.Type DEFAULT_TYPE = KeyValue.Type.Put;
|
||||
|
||||
/******************** fields ************************/
|
||||
|
||||
protected ByteBuff block;
|
||||
// we could also avoid setting the mvccVersion in the scanner/searcher, but
|
||||
// this is simpler
|
||||
protected boolean includeMvccVersion;
|
||||
|
||||
protected byte[] rowBuffer;
|
||||
protected int rowLength;
|
||||
|
||||
protected byte[] familyBuffer;
|
||||
protected int familyOffset;
|
||||
protected int familyLength;
|
||||
|
||||
protected byte[] qualifierBuffer;// aligned to the end of the array
|
||||
protected int qualifierOffset;
|
||||
protected int qualifierLength;
|
||||
|
||||
protected Long timestamp;
|
||||
protected Long mvccVersion;
|
||||
|
||||
protected KeyValue.Type type;
|
||||
|
||||
protected int absoluteValueOffset;
|
||||
protected int valueLength;
|
||||
|
||||
protected byte[] tagsBuffer;
|
||||
protected int tagsOffset;
|
||||
protected int tagsLength;
|
||||
// Pair to set the value ByteBuffer and its offset
|
||||
protected ObjectIntPair<ByteBuffer> pair = new ObjectIntPair<>();
|
||||
|
||||
/********************** Cell methods ******************/
|
||||
|
||||
/**
|
||||
* For debugging. Currently creates new KeyValue to utilize its toString()
|
||||
* method.
|
||||
*/
|
||||
@Override
|
||||
public String toString() {
|
||||
return getKeyValueString();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object obj) {
|
||||
if (!(obj instanceof Cell)) {
|
||||
return false;
|
||||
}
|
||||
// Temporary hack to maintain backwards compatibility with KeyValue.equals
|
||||
return PrivateCellUtil.equalsIgnoreMvccVersion(this, (Cell) obj);
|
||||
|
||||
// TODO return CellComparator.equals(this, (Cell)obj);//see HBASE-6907
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return calculateHashForKey(this);
|
||||
}
|
||||
|
||||
private int calculateHashForKey(Cell cell) {
|
||||
// pre-calculate the 3 hashes made of byte ranges
|
||||
int rowHash = Bytes.hashCode(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength());
|
||||
int familyHash = Bytes.hashCode(cell.getFamilyArray(), cell.getFamilyOffset(),
|
||||
cell.getFamilyLength());
|
||||
int qualifierHash = Bytes.hashCode(cell.getQualifierArray(), cell.getQualifierOffset(),
|
||||
cell.getQualifierLength());
|
||||
|
||||
// combine the 6 sub-hashes
|
||||
int hash = 31 * rowHash + familyHash;
|
||||
hash = 31 * hash + qualifierHash;
|
||||
hash = 31 * hash + (int) cell.getTimestamp();
|
||||
hash = 31 * hash + cell.getTypeByte();
|
||||
return hash;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareTo(Cell other) {
|
||||
return comparator.compare(this, other);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getTimestamp() {
|
||||
return timestamp;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getSequenceId() {
|
||||
if (!includeMvccVersion) {
|
||||
return 0L;
|
||||
}
|
||||
return mvccVersion;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getValueLength() {
|
||||
return valueLength;
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte[] getRowArray() {
|
||||
return rowBuffer;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getRowOffset() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public short getRowLength() {
|
||||
return (short) rowLength;
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte[] getFamilyArray() {
|
||||
return familyBuffer;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getFamilyOffset() {
|
||||
return familyOffset;
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte getFamilyLength() {
|
||||
return (byte) familyLength;
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte[] getQualifierArray() {
|
||||
return qualifierBuffer;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getQualifierOffset() {
|
||||
return qualifierOffset;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getQualifierLength() {
|
||||
return qualifierLength;
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte[] getValueArray() {
|
||||
if (this.pair.getFirst().hasArray()) {
|
||||
return this.pair.getFirst().array();
|
||||
} else {
|
||||
// Just in case getValueArray is called on offheap BB
|
||||
byte[] val = new byte[valueLength];
|
||||
ByteBufferUtils.copyFromBufferToArray(val, this.pair.getFirst(), this.pair.getSecond(), 0,
|
||||
valueLength);
|
||||
return val;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getValueOffset() {
|
||||
if (this.pair.getFirst().hasArray()) {
|
||||
return this.pair.getSecond() + this.pair.getFirst().arrayOffset();
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte getTypeByte() {
|
||||
return type.getCode();
|
||||
}
|
||||
|
||||
/************************* helper methods *************************/
|
||||
|
||||
/**
|
||||
* Need this separate method so we can call it from subclasses' toString()
|
||||
* methods
|
||||
*/
|
||||
protected String getKeyValueString() {
|
||||
KeyValue kv = KeyValueUtil.copyToNewKeyValue(this);
|
||||
return kv.toString();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getTagsOffset() {
|
||||
return tagsOffset;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getTagsLength() {
|
||||
return tagsLength;
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte[] getTagsArray() {
|
||||
return this.tagsBuffer;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setSequenceId(long seqId) {
|
||||
mvccVersion = seqId;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ByteBuffer getRowByteBuffer() {
|
||||
return ByteBuffer.wrap(rowBuffer);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getRowPosition() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ByteBuffer getFamilyByteBuffer() {
|
||||
return ByteBuffer.wrap(familyBuffer);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getFamilyPosition() {
|
||||
return getFamilyOffset();
|
||||
}
|
||||
|
||||
@Override
|
||||
public ByteBuffer getQualifierByteBuffer() {
|
||||
return ByteBuffer.wrap(qualifierBuffer);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getQualifierPosition() {
|
||||
return getQualifierOffset();
|
||||
}
|
||||
|
||||
@Override
|
||||
public ByteBuffer getValueByteBuffer() {
|
||||
return pair.getFirst();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getValuePosition() {
|
||||
return pair.getSecond();
|
||||
}
|
||||
|
||||
@Override
|
||||
public ByteBuffer getTagsByteBuffer() {
|
||||
return ByteBuffer.wrap(tagsBuffer);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getTagsPosition() {
|
||||
return getTagsOffset();
|
||||
}
|
||||
}
|
|
@ -1,109 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.hbase.codec.prefixtree.decode.column;
|
||||
|
||||
import org.apache.yetus.audience.InterfaceAudience;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeBlockMeta;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.encode.other.ColumnNodeType;
|
||||
import org.apache.hadoop.hbase.nio.ByteBuff;
|
||||
import org.apache.hadoop.hbase.util.vint.UFIntTool;
|
||||
import org.apache.hadoop.hbase.util.vint.UVIntTool;
|
||||
|
||||
@InterfaceAudience.Private
|
||||
public class ColumnNodeReader {
|
||||
|
||||
/**************** fields ************************/
|
||||
|
||||
protected PrefixTreeBlockMeta blockMeta;
|
||||
protected ByteBuff block;
|
||||
protected ColumnNodeType nodeType;
|
||||
protected byte[] columnBuffer;
|
||||
|
||||
protected int offsetIntoBlock;
|
||||
|
||||
protected int tokenOffsetIntoBlock;
|
||||
protected int tokenLength;
|
||||
protected int parentStartPosition;
|
||||
|
||||
|
||||
/************** construct *************************/
|
||||
|
||||
public ColumnNodeReader(byte[] columnBuffer, ColumnNodeType nodeType) {
|
||||
this.columnBuffer = columnBuffer;
|
||||
this.nodeType = nodeType;
|
||||
}
|
||||
|
||||
public void initOnBlock(PrefixTreeBlockMeta blockMeta, ByteBuff block) {
|
||||
this.blockMeta = blockMeta;
|
||||
this.block = block;
|
||||
}
|
||||
|
||||
|
||||
/************* methods *****************************/
|
||||
|
||||
public void positionAt(int offsetIntoBlock) {
|
||||
this.offsetIntoBlock = offsetIntoBlock;
|
||||
tokenLength = UVIntTool.getInt(block, offsetIntoBlock);
|
||||
tokenOffsetIntoBlock = offsetIntoBlock + UVIntTool.numBytes(tokenLength);
|
||||
int parentStartPositionIndex = tokenOffsetIntoBlock + tokenLength;
|
||||
int offsetWidth;
|
||||
if(nodeType == ColumnNodeType.FAMILY) {
|
||||
offsetWidth = blockMeta.getFamilyOffsetWidth();
|
||||
} else if(nodeType == ColumnNodeType.QUALIFIER) {
|
||||
offsetWidth = blockMeta.getQualifierOffsetWidth();
|
||||
} else {
|
||||
offsetWidth = blockMeta.getTagsOffsetWidth();
|
||||
}
|
||||
parentStartPosition = (int) UFIntTool.fromBytes(block, parentStartPositionIndex, offsetWidth);
|
||||
}
|
||||
|
||||
public void prependTokenToBuffer(int bufferStartIndex) {
|
||||
block.get(tokenOffsetIntoBlock, columnBuffer, bufferStartIndex, tokenLength);
|
||||
}
|
||||
|
||||
public boolean isRoot() {
|
||||
if (nodeType == ColumnNodeType.FAMILY) {
|
||||
return offsetIntoBlock == blockMeta.getAbsoluteFamilyOffset();
|
||||
} else if (nodeType == ColumnNodeType.QUALIFIER) {
|
||||
return offsetIntoBlock == blockMeta.getAbsoluteQualifierOffset();
|
||||
} else {
|
||||
return offsetIntoBlock == blockMeta.getAbsoluteTagsOffset();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/************** standard methods *********************/
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return super.toString() + "[" + offsetIntoBlock + "]";
|
||||
}
|
||||
|
||||
|
||||
/****************** get/set ****************************/
|
||||
|
||||
public int getTokenLength() {
|
||||
return tokenLength;
|
||||
}
|
||||
|
||||
public int getParentStartPosition() {
|
||||
return parentStartPosition;
|
||||
}
|
||||
|
||||
}
|
|
@ -1,108 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.hbase.codec.prefixtree.decode.column;
|
||||
|
||||
import org.apache.yetus.audience.InterfaceAudience;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeBlockMeta;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.encode.other.ColumnNodeType;
|
||||
import org.apache.hadoop.hbase.nio.ByteBuff;
|
||||
|
||||
/**
|
||||
* Position one of these appropriately in the data block and you can call its methods to retrieve
|
||||
* the family or qualifier at the current position.
|
||||
*/
|
||||
@InterfaceAudience.Private
|
||||
public class ColumnReader {
|
||||
|
||||
/****************** fields *************************/
|
||||
|
||||
protected PrefixTreeBlockMeta blockMeta;
|
||||
|
||||
protected byte[] columnBuffer;
|
||||
protected int columnOffset;
|
||||
protected int columnLength;
|
||||
protected ColumnNodeType nodeType;
|
||||
|
||||
protected ColumnNodeReader columnNodeReader;
|
||||
|
||||
|
||||
/******************** construct *******************/
|
||||
|
||||
public ColumnReader(byte[] columnBuffer, ColumnNodeType nodeType) {
|
||||
this.columnBuffer = columnBuffer;
|
||||
this.nodeType = nodeType;
|
||||
this.columnNodeReader = new ColumnNodeReader(columnBuffer, nodeType);
|
||||
}
|
||||
|
||||
public void initOnBlock(PrefixTreeBlockMeta blockMeta, ByteBuff block) {
|
||||
this.blockMeta = blockMeta;
|
||||
clearColumnBuffer();
|
||||
columnNodeReader.initOnBlock(blockMeta, block);
|
||||
}
|
||||
|
||||
|
||||
/********************* methods *******************/
|
||||
|
||||
public ColumnReader populateBuffer(int offsetIntoColumnData) {
|
||||
clearColumnBuffer();
|
||||
int nextRelativeOffset = offsetIntoColumnData;
|
||||
while (true) {
|
||||
int absoluteOffset = 0;
|
||||
if (nodeType == ColumnNodeType.FAMILY) {
|
||||
absoluteOffset = blockMeta.getAbsoluteFamilyOffset() + nextRelativeOffset;
|
||||
} else if (nodeType == ColumnNodeType.QUALIFIER) {
|
||||
absoluteOffset = blockMeta.getAbsoluteQualifierOffset() + nextRelativeOffset;
|
||||
} else {
|
||||
absoluteOffset = blockMeta.getAbsoluteTagsOffset() + nextRelativeOffset;
|
||||
}
|
||||
columnNodeReader.positionAt(absoluteOffset);
|
||||
columnOffset -= columnNodeReader.getTokenLength();
|
||||
columnLength += columnNodeReader.getTokenLength();
|
||||
columnNodeReader.prependTokenToBuffer(columnOffset);
|
||||
if (columnNodeReader.isRoot()) {
|
||||
return this;
|
||||
}
|
||||
nextRelativeOffset = columnNodeReader.getParentStartPosition();
|
||||
}
|
||||
}
|
||||
|
||||
public byte[] copyBufferToNewArray() {// for testing
|
||||
byte[] out = new byte[columnLength];
|
||||
System.arraycopy(columnBuffer, columnOffset, out, 0, out.length);
|
||||
return out;
|
||||
}
|
||||
|
||||
public int getColumnLength() {
|
||||
return columnLength;
|
||||
}
|
||||
|
||||
public void clearColumnBuffer() {
|
||||
columnOffset = columnBuffer.length;
|
||||
columnLength = 0;
|
||||
}
|
||||
|
||||
|
||||
/****************************** get/set *************************************/
|
||||
|
||||
public int getColumnOffset() {
|
||||
return columnOffset;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -1,281 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.hbase.codec.prefixtree.decode.row;
|
||||
|
||||
import org.apache.yetus.audience.InterfaceAudience;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeBlockMeta;
|
||||
import org.apache.hadoop.hbase.nio.ByteBuff;
|
||||
import org.apache.hadoop.hbase.util.vint.UFIntTool;
|
||||
import org.apache.hadoop.hbase.util.vint.UVIntTool;
|
||||
|
||||
/**
|
||||
* Position one of these appropriately in the data block and you can call its methods to retrieve
|
||||
* information necessary to decode the cells in the row.
|
||||
*/
|
||||
@InterfaceAudience.Private
|
||||
public class RowNodeReader {
|
||||
|
||||
/************* fields ***********************************/
|
||||
|
||||
protected ByteBuff block;
|
||||
protected int offset;
|
||||
protected int fanIndex;
|
||||
|
||||
protected int numCells;
|
||||
|
||||
protected int tokenOffset;
|
||||
protected int tokenLength;
|
||||
protected int fanOffset;
|
||||
protected int fanOut;
|
||||
|
||||
protected int familyOffsetsOffset;
|
||||
protected int qualifierOffsetsOffset;
|
||||
protected int timestampIndexesOffset;
|
||||
protected int mvccVersionIndexesOffset;
|
||||
protected int operationTypesOffset;
|
||||
protected int valueOffsetsOffset;
|
||||
protected int valueLengthsOffset;
|
||||
protected int tagOffsetsOffset;
|
||||
protected int nextNodeOffsetsOffset;
|
||||
|
||||
|
||||
/******************* construct **************************/
|
||||
|
||||
public void initOnBlock(PrefixTreeBlockMeta blockMeta, ByteBuff block, int offset) {
|
||||
this.block = block;
|
||||
|
||||
this.offset = offset;
|
||||
resetFanIndex();
|
||||
|
||||
this.tokenLength = UVIntTool.getInt(block, offset);
|
||||
this.tokenOffset = offset + UVIntTool.numBytes(tokenLength);
|
||||
|
||||
this.fanOut = UVIntTool.getInt(block, tokenOffset + tokenLength);
|
||||
this.fanOffset = tokenOffset + tokenLength + UVIntTool.numBytes(fanOut);
|
||||
|
||||
this.numCells = UVIntTool.getInt(block, fanOffset + fanOut);
|
||||
|
||||
this.familyOffsetsOffset = fanOffset + fanOut + UVIntTool.numBytes(numCells);
|
||||
this.qualifierOffsetsOffset = familyOffsetsOffset + numCells * blockMeta.getFamilyOffsetWidth();
|
||||
this.tagOffsetsOffset = this.qualifierOffsetsOffset + numCells * blockMeta.getQualifierOffsetWidth();
|
||||
// TODO : This code may not be needed now..As we always consider tags to be present
|
||||
if(blockMeta.getTagsOffsetWidth() == 0) {
|
||||
// Make both of them same so that we know that there are no tags
|
||||
this.tagOffsetsOffset = this.qualifierOffsetsOffset;
|
||||
this.timestampIndexesOffset = qualifierOffsetsOffset + numCells * blockMeta.getQualifierOffsetWidth();
|
||||
} else {
|
||||
this.timestampIndexesOffset = tagOffsetsOffset + numCells * blockMeta.getTagsOffsetWidth();
|
||||
}
|
||||
this.mvccVersionIndexesOffset = timestampIndexesOffset + numCells
|
||||
* blockMeta.getTimestampIndexWidth();
|
||||
this.operationTypesOffset = mvccVersionIndexesOffset + numCells
|
||||
* blockMeta.getMvccVersionIndexWidth();
|
||||
this.valueOffsetsOffset = operationTypesOffset + numCells * blockMeta.getKeyValueTypeWidth();
|
||||
this.valueLengthsOffset = valueOffsetsOffset + numCells * blockMeta.getValueOffsetWidth();
|
||||
this.nextNodeOffsetsOffset = valueLengthsOffset + numCells * blockMeta.getValueLengthWidth();
|
||||
}
|
||||
|
||||
|
||||
/******************** methods ****************************/
|
||||
|
||||
public boolean isLeaf() {
|
||||
return fanOut == 0;
|
||||
}
|
||||
|
||||
public boolean isNub() {
|
||||
return fanOut > 0 && numCells > 0;
|
||||
}
|
||||
|
||||
public boolean isBranch() {
|
||||
return fanOut > 0 && numCells == 0;
|
||||
}
|
||||
|
||||
public boolean hasOccurrences() {
|
||||
return numCells > 0;
|
||||
}
|
||||
|
||||
public int getTokenArrayOffset(){
|
||||
return tokenOffset;
|
||||
}
|
||||
|
||||
public int getTokenLength() {
|
||||
return tokenLength;
|
||||
}
|
||||
|
||||
public byte getFanByte(int i) {
|
||||
return block.get(fanOffset + i);
|
||||
}
|
||||
|
||||
/**
|
||||
* for debugging
|
||||
*/
|
||||
protected String getFanByteReadable(int i){
|
||||
return ByteBuff.toStringBinary(block, fanOffset + i, 1);
|
||||
}
|
||||
|
||||
public int getFamilyOffset(int index, PrefixTreeBlockMeta blockMeta) {
|
||||
int fIntWidth = blockMeta.getFamilyOffsetWidth();
|
||||
int startIndex = familyOffsetsOffset + fIntWidth * index;
|
||||
return (int) UFIntTool.fromBytes(block, startIndex, fIntWidth);
|
||||
}
|
||||
|
||||
public int getColumnOffset(int index, PrefixTreeBlockMeta blockMeta) {
|
||||
int fIntWidth = blockMeta.getQualifierOffsetWidth();
|
||||
int startIndex = qualifierOffsetsOffset + fIntWidth * index;
|
||||
return (int) UFIntTool.fromBytes(block, startIndex, fIntWidth);
|
||||
}
|
||||
|
||||
public int getTagOffset(int index, PrefixTreeBlockMeta blockMeta) {
|
||||
int fIntWidth = blockMeta.getTagsOffsetWidth();
|
||||
int startIndex = tagOffsetsOffset + fIntWidth * index;
|
||||
return (int) UFIntTool.fromBytes(block, startIndex, fIntWidth);
|
||||
}
|
||||
|
||||
public int getTimestampIndex(int index, PrefixTreeBlockMeta blockMeta) {
|
||||
int fIntWidth = blockMeta.getTimestampIndexWidth();
|
||||
int startIndex = timestampIndexesOffset + fIntWidth * index;
|
||||
return (int) UFIntTool.fromBytes(block, startIndex, fIntWidth);
|
||||
}
|
||||
|
||||
public int getMvccVersionIndex(int index, PrefixTreeBlockMeta blockMeta) {
|
||||
int fIntWidth = blockMeta.getMvccVersionIndexWidth();
|
||||
int startIndex = mvccVersionIndexesOffset + fIntWidth * index;
|
||||
return (int) UFIntTool.fromBytes(block, startIndex, fIntWidth);
|
||||
}
|
||||
|
||||
public int getType(int index, PrefixTreeBlockMeta blockMeta) {
|
||||
if (blockMeta.isAllSameType()) {
|
||||
return blockMeta.getAllTypes();
|
||||
}
|
||||
return block.get(operationTypesOffset + index);
|
||||
}
|
||||
|
||||
public int getValueOffset(int index, PrefixTreeBlockMeta blockMeta) {
|
||||
int fIntWidth = blockMeta.getValueOffsetWidth();
|
||||
int startIndex = valueOffsetsOffset + fIntWidth * index;
|
||||
int offset = (int) UFIntTool.fromBytes(block, startIndex, fIntWidth);
|
||||
return offset;
|
||||
}
|
||||
|
||||
public int getValueLength(int index, PrefixTreeBlockMeta blockMeta) {
|
||||
int fIntWidth = blockMeta.getValueLengthWidth();
|
||||
int startIndex = valueLengthsOffset + fIntWidth * index;
|
||||
int length = (int) UFIntTool.fromBytes(block, startIndex, fIntWidth);
|
||||
return length;
|
||||
}
|
||||
|
||||
public int getNextNodeOffset(int index, PrefixTreeBlockMeta blockMeta) {
|
||||
int fIntWidth = blockMeta.getNextNodeOffsetWidth();
|
||||
int startIndex = nextNodeOffsetsOffset + fIntWidth * index;
|
||||
return (int) UFIntTool.fromBytes(block, startIndex, fIntWidth);
|
||||
}
|
||||
|
||||
public String getBranchNubLeafIndicator() {
|
||||
if (isNub()) {
|
||||
return "N";
|
||||
}
|
||||
return isBranch() ? "B" : "L";
|
||||
}
|
||||
|
||||
public boolean hasChildren() {
|
||||
return fanOut > 0;
|
||||
}
|
||||
|
||||
public int getLastFanIndex() {
|
||||
return fanOut - 1;
|
||||
}
|
||||
|
||||
public int getLastCellIndex() {
|
||||
return numCells - 1;
|
||||
}
|
||||
|
||||
public int getNumCells() {
|
||||
return numCells;
|
||||
}
|
||||
|
||||
public int getFanOut() {
|
||||
return fanOut;
|
||||
}
|
||||
|
||||
public byte[] getToken() {
|
||||
byte[] newToken = new byte[tokenLength];
|
||||
block.get(tokenOffset, newToken, 0, tokenLength);
|
||||
return newToken;
|
||||
}
|
||||
|
||||
public int getOffset() {
|
||||
return offset;
|
||||
}
|
||||
|
||||
public int whichFanNode(byte searchForByte) {
|
||||
if( ! hasFan()){
|
||||
throw new IllegalStateException("This row node has no fan, so can't search it");
|
||||
}
|
||||
int fanIndexInBlock = ByteBuff.unsignedBinarySearch(block, fanOffset, fanOffset + fanOut,
|
||||
searchForByte);
|
||||
if (fanIndexInBlock >= 0) {// found it, but need to adjust for position of fan in overall block
|
||||
return fanIndexInBlock - fanOffset;
|
||||
}
|
||||
return fanIndexInBlock + fanOffset;// didn't find it, so compensate in reverse
|
||||
}
|
||||
|
||||
public void resetFanIndex() {
|
||||
fanIndex = -1;// just the way the logic currently works
|
||||
}
|
||||
|
||||
public int getFanIndex() {
|
||||
return fanIndex;
|
||||
}
|
||||
|
||||
public void setFanIndex(int fanIndex) {
|
||||
this.fanIndex = fanIndex;
|
||||
}
|
||||
|
||||
public boolean hasFan(){
|
||||
return fanOut > 0;
|
||||
}
|
||||
|
||||
public boolean hasPreviousFanNodes() {
|
||||
return fanOut > 0 && fanIndex > 0;
|
||||
}
|
||||
|
||||
public boolean hasMoreFanNodes() {
|
||||
return fanIndex < getLastFanIndex();
|
||||
}
|
||||
|
||||
public boolean isOnLastFanNode() {
|
||||
return !hasMoreFanNodes();
|
||||
}
|
||||
|
||||
|
||||
/*************** standard methods **************************/
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
sb.append("fan:" + ByteBuff.toStringBinary(block, fanOffset, fanOut));
|
||||
sb.append(",token:" + ByteBuff.toStringBinary(block, tokenOffset, tokenLength));
|
||||
sb.append(",numCells:" + numCells);
|
||||
sb.append(",fanIndex:"+fanIndex);
|
||||
if(fanIndex>=0){
|
||||
sb.append("("+getFanByteReadable(fanIndex)+")");
|
||||
}
|
||||
return sb.toString();
|
||||
}
|
||||
}
|
|
@ -1,58 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.hbase.codec.prefixtree.decode.timestamp;
|
||||
|
||||
import org.apache.yetus.audience.InterfaceAudience;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeBlockMeta;
|
||||
import org.apache.hadoop.hbase.nio.ByteBuff;
|
||||
import org.apache.hadoop.hbase.util.vint.UFIntTool;
|
||||
|
||||
/**
|
||||
* Given a block and its blockMeta, this will decode the MvccVersion for the i-th Cell in the block.
|
||||
*/
|
||||
@InterfaceAudience.Private
|
||||
public class MvccVersionDecoder {
|
||||
|
||||
protected PrefixTreeBlockMeta blockMeta;
|
||||
protected ByteBuff block;
|
||||
|
||||
|
||||
/************** construct ***********************/
|
||||
|
||||
public MvccVersionDecoder() {
|
||||
}
|
||||
|
||||
public void initOnBlock(PrefixTreeBlockMeta blockMeta, ByteBuff block) {
|
||||
this.block = block;
|
||||
this.blockMeta = blockMeta;
|
||||
}
|
||||
|
||||
|
||||
/************** methods *************************/
|
||||
|
||||
public long getMvccVersion(int index) {
|
||||
if (blockMeta.getMvccVersionIndexWidth() == 0) {//all mvccVersions in the block were identical
|
||||
return blockMeta.getMinMvccVersion();
|
||||
}
|
||||
int startIndex = blockMeta.getAbsoluteMvccVersionOffset()
|
||||
+ blockMeta.getMvccVersionDeltaWidth() * index;
|
||||
long delta = UFIntTool.fromBytes(block, startIndex, blockMeta.getMvccVersionDeltaWidth());
|
||||
return blockMeta.getMinMvccVersion() + delta;
|
||||
}
|
||||
}
|
|
@ -1,58 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.hbase.codec.prefixtree.decode.timestamp;
|
||||
|
||||
import org.apache.yetus.audience.InterfaceAudience;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeBlockMeta;
|
||||
import org.apache.hadoop.hbase.nio.ByteBuff;
|
||||
import org.apache.hadoop.hbase.util.vint.UFIntTool;
|
||||
|
||||
/**
|
||||
* Given a block and its blockMeta, this will decode the timestamp for the i-th Cell in the block.
|
||||
*/
|
||||
@InterfaceAudience.Private
|
||||
public class TimestampDecoder {
|
||||
|
||||
protected PrefixTreeBlockMeta blockMeta;
|
||||
protected ByteBuff block;
|
||||
|
||||
|
||||
/************** construct ***********************/
|
||||
|
||||
public TimestampDecoder() {
|
||||
}
|
||||
|
||||
public void initOnBlock(PrefixTreeBlockMeta blockMeta, ByteBuff block) {
|
||||
this.block = block;
|
||||
this.blockMeta = blockMeta;
|
||||
}
|
||||
|
||||
|
||||
/************** methods *************************/
|
||||
|
||||
public long getLong(int index) {
|
||||
if (blockMeta.getTimestampIndexWidth() == 0) {//all timestamps in the block were identical
|
||||
return blockMeta.getMinTimestamp();
|
||||
}
|
||||
int startIndex = blockMeta.getAbsoluteTimestampOffset() + blockMeta.getTimestampDeltaWidth()
|
||||
* index;
|
||||
long delta = UFIntTool.fromBytes(block, startIndex, blockMeta.getTimestampDeltaWidth());
|
||||
return blockMeta.getMinTimestamp() + delta;
|
||||
}
|
||||
}
|
|
@ -1,56 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.hbase.codec.prefixtree.encode;
|
||||
|
||||
import java.io.OutputStream;
|
||||
|
||||
import org.apache.yetus.audience.InterfaceAudience;
|
||||
|
||||
/**
|
||||
* Retrieve PrefixTreeEncoders from this factory which handles pooling them and preparing the
|
||||
* ones retrieved from the pool for usage.
|
||||
*/
|
||||
@InterfaceAudience.Private
|
||||
public class EncoderFactory {
|
||||
|
||||
private static final EncoderPool POOL = new EncoderPoolImpl();
|
||||
|
||||
|
||||
public static PrefixTreeEncoder checkOut(OutputStream outputStream, boolean includeMvccVersion) {
|
||||
return POOL.checkOut(outputStream, includeMvccVersion);
|
||||
}
|
||||
|
||||
public static void checkIn(PrefixTreeEncoder encoder) {
|
||||
POOL.checkIn(encoder);
|
||||
}
|
||||
|
||||
|
||||
/**************************** helper ******************************/
|
||||
|
||||
protected static PrefixTreeEncoder prepareEncoder(PrefixTreeEncoder encoder,
|
||||
OutputStream outputStream, boolean includeMvccVersion) {
|
||||
PrefixTreeEncoder ret = encoder;
|
||||
if (encoder == null) {
|
||||
ret = new PrefixTreeEncoder(outputStream, includeMvccVersion);
|
||||
}
|
||||
ret.reset(outputStream, includeMvccVersion);
|
||||
return ret;
|
||||
}
|
||||
|
||||
}
|
|
@ -1,32 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.hbase.codec.prefixtree.encode;
|
||||
|
||||
import java.io.OutputStream;
|
||||
|
||||
import org.apache.yetus.audience.InterfaceAudience;
|
||||
|
||||
|
||||
@InterfaceAudience.Private
|
||||
public interface EncoderPool {
|
||||
|
||||
PrefixTreeEncoder checkOut(OutputStream outputStream, boolean includeMvccVersion);
|
||||
void checkIn(PrefixTreeEncoder encoder);
|
||||
|
||||
}
|
|
@ -1,46 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.hbase.codec.prefixtree.encode;
|
||||
|
||||
import java.io.OutputStream;
|
||||
import java.util.concurrent.BlockingQueue;
|
||||
import java.util.concurrent.LinkedBlockingQueue;
|
||||
|
||||
import org.apache.yetus.audience.InterfaceAudience;
|
||||
|
||||
@InterfaceAudience.Private
|
||||
public class EncoderPoolImpl implements EncoderPool {
|
||||
|
||||
private BlockingQueue<PrefixTreeEncoder> unusedEncoders = new LinkedBlockingQueue<>();
|
||||
|
||||
@Override
|
||||
public PrefixTreeEncoder checkOut(OutputStream outputStream, boolean includeMvccVersion) {
|
||||
PrefixTreeEncoder encoder = unusedEncoders.poll();
|
||||
if (encoder == null) {
|
||||
encoder = new PrefixTreeEncoder(outputStream, includeMvccVersion);
|
||||
} else {
|
||||
encoder.reset(outputStream, includeMvccVersion);
|
||||
}
|
||||
return encoder;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void checkIn(PrefixTreeEncoder encoder) {
|
||||
this.unusedEncoders.add(encoder);
|
||||
}
|
||||
}
|
|
@ -1,542 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.hbase.codec.prefixtree.encode;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.OutputStream;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.yetus.audience.InterfaceAudience;
|
||||
import org.apache.hadoop.hbase.Cell;
|
||||
import org.apache.hadoop.hbase.CellUtil;
|
||||
import org.apache.hadoop.hbase.PrivateCellUtil;
|
||||
import org.apache.hadoop.hbase.KeyValueUtil;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeBlockMeta;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.encode.column.ColumnSectionWriter;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.encode.other.CellTypeEncoder;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.encode.other.ColumnNodeType;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.encode.other.LongEncoder;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.encode.row.RowSectionWriter;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.encode.tokenize.Tokenizer;
|
||||
import org.apache.hadoop.hbase.io.CellOutputStream;
|
||||
import org.apache.hadoop.hbase.util.ArrayUtils;
|
||||
import org.apache.hadoop.hbase.util.ByteRange;
|
||||
import org.apache.hadoop.hbase.util.SimpleMutableByteRange;
|
||||
import org.apache.hadoop.hbase.util.byterange.ByteRangeSet;
|
||||
import org.apache.hadoop.hbase.util.byterange.impl.ByteRangeHashSet;
|
||||
import org.apache.hadoop.hbase.util.byterange.impl.ByteRangeTreeSet;
|
||||
import org.apache.hadoop.hbase.util.vint.UFIntTool;
|
||||
import org.apache.hadoop.io.WritableUtils;
|
||||
/**
|
||||
* This is the primary class for converting a CellOutputStream into an encoded byte[]. As Cells are
|
||||
* added they are completely copied into the various encoding structures. This is important because
|
||||
* usually the cells being fed in during compactions will be transient.<br>
|
||||
* <br>
|
||||
* Usage:<br>
|
||||
* 1) constructor<br>
|
||||
* 4) append cells in sorted order: write(Cell cell)<br>
|
||||
* 5) flush()<br>
|
||||
*/
|
||||
@InterfaceAudience.Private
|
||||
public class PrefixTreeEncoder implements CellOutputStream {
|
||||
|
||||
/**************** static ************************/
|
||||
|
||||
protected static final Log LOG = LogFactory.getLog(PrefixTreeEncoder.class);
|
||||
|
||||
//future-proof where HBase supports multiple families in a data block.
|
||||
public static final boolean MULITPLE_FAMILIES_POSSIBLE = false;
|
||||
|
||||
private static final boolean USE_HASH_COLUMN_SORTER = true;
|
||||
private static final int INITIAL_PER_CELL_ARRAY_SIZES = 256;
|
||||
private static final int VALUE_BUFFER_INIT_SIZE = 64 * 1024;
|
||||
|
||||
|
||||
/**************** fields *************************/
|
||||
|
||||
protected long numResets = 0L;
|
||||
|
||||
protected OutputStream outputStream;
|
||||
|
||||
/*
|
||||
* Cannot change during a single block's encoding. If false, then substitute incoming Cell's
|
||||
* mvccVersion with zero and write out the block as usual.
|
||||
*/
|
||||
protected boolean includeMvccVersion;
|
||||
|
||||
/*
|
||||
* reusable ByteRanges used for communicating with the sorters/compilers
|
||||
*/
|
||||
protected ByteRange rowRange;
|
||||
protected ByteRange familyRange;
|
||||
protected ByteRange qualifierRange;
|
||||
protected ByteRange tagsRange;
|
||||
|
||||
/*
|
||||
* incoming Cell fields are copied into these arrays
|
||||
*/
|
||||
protected long[] timestamps;
|
||||
protected long[] mvccVersions;
|
||||
protected byte[] typeBytes;
|
||||
protected int[] valueOffsets;
|
||||
protected int[] tagsOffsets;
|
||||
protected byte[] values;
|
||||
protected byte[] tags;
|
||||
|
||||
protected PrefixTreeBlockMeta blockMeta;
|
||||
|
||||
/*
|
||||
* Sub-encoders for the simple long/byte fields of a Cell. Add to these as each cell arrives and
|
||||
* compile before flushing.
|
||||
*/
|
||||
protected LongEncoder timestampEncoder;
|
||||
protected LongEncoder mvccVersionEncoder;
|
||||
protected CellTypeEncoder cellTypeEncoder;
|
||||
|
||||
/*
|
||||
* Structures used for collecting families and qualifiers, de-duplicating them, and sorting them
|
||||
* so they can be passed to the tokenizers. Unlike row keys where we can detect duplicates by
|
||||
* comparing only with the previous row key, families and qualifiers can arrive in unsorted order
|
||||
* in blocks spanning multiple rows. We must collect them all into a set to de-duplicate them.
|
||||
*/
|
||||
protected ByteRangeSet familyDeduplicator;
|
||||
protected ByteRangeSet qualifierDeduplicator;
|
||||
protected ByteRangeSet tagsDeduplicator;
|
||||
/*
|
||||
* Feed sorted byte[]s into these tokenizers which will convert the byte[]s to an in-memory
|
||||
* trie structure with nodes connected by memory pointers (not serializable yet).
|
||||
*/
|
||||
protected Tokenizer rowTokenizer;
|
||||
protected Tokenizer familyTokenizer;
|
||||
protected Tokenizer qualifierTokenizer;
|
||||
protected Tokenizer tagsTokenizer;
|
||||
|
||||
/*
|
||||
* Writers take an in-memory trie, sort the nodes, calculate offsets and lengths, and write
|
||||
* all information to an output stream of bytes that can be stored on disk.
|
||||
*/
|
||||
protected RowSectionWriter rowWriter;
|
||||
protected ColumnSectionWriter familyWriter;
|
||||
protected ColumnSectionWriter qualifierWriter;
|
||||
protected ColumnSectionWriter tagsWriter;
|
||||
|
||||
/*
|
||||
* Integers used for counting cells and bytes. We keep track of the size of the Cells as if they
|
||||
* were full KeyValues because some parts of HBase like to know the "unencoded size".
|
||||
*/
|
||||
protected int totalCells = 0;
|
||||
protected int totalUnencodedBytes = 0;//numBytes if the cells were KeyValues
|
||||
protected int totalValueBytes = 0;
|
||||
protected int totalTagBytes = 0;
|
||||
protected int maxValueLength = 0;
|
||||
protected int maxTagLength = 0;
|
||||
protected int totalBytes = 0;//
|
||||
|
||||
|
||||
/***************** construct ***********************/
|
||||
|
||||
public PrefixTreeEncoder(OutputStream outputStream, boolean includeMvccVersion) {
|
||||
// used during cell accumulation
|
||||
this.blockMeta = new PrefixTreeBlockMeta();
|
||||
this.rowRange = new SimpleMutableByteRange();
|
||||
this.familyRange = new SimpleMutableByteRange();
|
||||
this.qualifierRange = new SimpleMutableByteRange();
|
||||
this.timestamps = new long[INITIAL_PER_CELL_ARRAY_SIZES];
|
||||
this.mvccVersions = new long[INITIAL_PER_CELL_ARRAY_SIZES];
|
||||
this.typeBytes = new byte[INITIAL_PER_CELL_ARRAY_SIZES];
|
||||
this.valueOffsets = new int[INITIAL_PER_CELL_ARRAY_SIZES];
|
||||
this.values = new byte[VALUE_BUFFER_INIT_SIZE];
|
||||
|
||||
// used during compilation
|
||||
this.familyDeduplicator = USE_HASH_COLUMN_SORTER ? new ByteRangeHashSet()
|
||||
: new ByteRangeTreeSet();
|
||||
this.qualifierDeduplicator = USE_HASH_COLUMN_SORTER ? new ByteRangeHashSet()
|
||||
: new ByteRangeTreeSet();
|
||||
this.timestampEncoder = new LongEncoder();
|
||||
this.mvccVersionEncoder = new LongEncoder();
|
||||
this.cellTypeEncoder = new CellTypeEncoder();
|
||||
this.rowTokenizer = new Tokenizer();
|
||||
this.familyTokenizer = new Tokenizer();
|
||||
this.qualifierTokenizer = new Tokenizer();
|
||||
this.rowWriter = new RowSectionWriter();
|
||||
this.familyWriter = new ColumnSectionWriter();
|
||||
this.qualifierWriter = new ColumnSectionWriter();
|
||||
initializeTagHelpers();
|
||||
|
||||
reset(outputStream, includeMvccVersion);
|
||||
}
|
||||
|
||||
public void reset(OutputStream outputStream, boolean includeMvccVersion) {
|
||||
++numResets;
|
||||
this.includeMvccVersion = includeMvccVersion;
|
||||
this.outputStream = outputStream;
|
||||
valueOffsets[0] = 0;
|
||||
familyDeduplicator.reset();
|
||||
qualifierDeduplicator.reset();
|
||||
tagsDeduplicator.reset();
|
||||
tagsWriter.reset();
|
||||
tagsTokenizer.reset();
|
||||
rowTokenizer.reset();
|
||||
timestampEncoder.reset();
|
||||
mvccVersionEncoder.reset();
|
||||
cellTypeEncoder.reset();
|
||||
familyTokenizer.reset();
|
||||
qualifierTokenizer.reset();
|
||||
rowWriter.reset();
|
||||
familyWriter.reset();
|
||||
qualifierWriter.reset();
|
||||
|
||||
totalCells = 0;
|
||||
totalUnencodedBytes = 0;
|
||||
totalValueBytes = 0;
|
||||
maxValueLength = 0;
|
||||
totalBytes = 0;
|
||||
}
|
||||
|
||||
protected void initializeTagHelpers() {
|
||||
this.tagsRange = new SimpleMutableByteRange();
|
||||
this.tagsDeduplicator = USE_HASH_COLUMN_SORTER ? new ByteRangeHashSet()
|
||||
: new ByteRangeTreeSet();
|
||||
this.tagsTokenizer = new Tokenizer();
|
||||
this.tagsWriter = new ColumnSectionWriter();
|
||||
}
|
||||
|
||||
/**
|
||||
* Check that the arrays used to hold cell fragments are large enough for the cell that is being
|
||||
* added. Since the PrefixTreeEncoder is cached between uses, these arrays may grow during the
|
||||
* first few block encodings but should stabilize quickly.
|
||||
*/
|
||||
protected void ensurePerCellCapacities() {
|
||||
int currentCapacity = valueOffsets.length;
|
||||
int neededCapacity = totalCells + 2;// some things write one index ahead. +2 to be safe
|
||||
if (neededCapacity < currentCapacity) {
|
||||
return;
|
||||
}
|
||||
|
||||
int padding = neededCapacity;//this will double the array size
|
||||
timestamps = ArrayUtils.growIfNecessary(timestamps, neededCapacity, padding);
|
||||
mvccVersions = ArrayUtils.growIfNecessary(mvccVersions, neededCapacity, padding);
|
||||
typeBytes = ArrayUtils.growIfNecessary(typeBytes, neededCapacity, padding);
|
||||
valueOffsets = ArrayUtils.growIfNecessary(valueOffsets, neededCapacity, padding);
|
||||
}
|
||||
|
||||
/******************** CellOutputStream methods *************************/
|
||||
|
||||
/**
|
||||
* Note: Unused until support is added to the scanner/heap
|
||||
* <p/>
|
||||
* The following method are optimized versions of write(Cell cell). The result should be
|
||||
* identical, however the implementation may be able to execute them much more efficiently because
|
||||
* it does not need to compare the unchanged fields with the previous cell's.
|
||||
* <p/>
|
||||
* Consider the benefits during compaction when paired with a CellScanner that is also aware of
|
||||
* row boundaries. The CellScanner can easily use these methods instead of blindly passing Cells
|
||||
* to the write(Cell cell) method.
|
||||
* <p/>
|
||||
* The savings of skipping duplicate row detection are significant with long row keys. A
|
||||
* DataBlockEncoder may store a row key once in combination with a count of how many cells are in
|
||||
* the row. With a 100 byte row key, we can replace 100 byte comparisons with a single increment
|
||||
* of the counter, and that is for every cell in the row.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Add a Cell to the output stream but repeat the previous row.
|
||||
*/
|
||||
//@Override
|
||||
public void writeWithRepeatRow(Cell cell) {
|
||||
ensurePerCellCapacities();//can we optimize away some of this?
|
||||
|
||||
//save a relatively expensive row comparison, incrementing the row's counter instead
|
||||
rowTokenizer.incrementNumOccurrencesOfLatestValue();
|
||||
addFamilyPart(cell);
|
||||
addQualifierPart(cell);
|
||||
addAfterRowFamilyQualifier(cell);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public void write(Cell cell) {
|
||||
ensurePerCellCapacities();
|
||||
|
||||
rowTokenizer.addSorted(PrivateCellUtil.fillRowRange(cell, rowRange));
|
||||
addFamilyPart(cell);
|
||||
addQualifierPart(cell);
|
||||
addTagPart(cell);
|
||||
addAfterRowFamilyQualifier(cell);
|
||||
}
|
||||
|
||||
|
||||
private void addTagPart(Cell cell) {
|
||||
PrivateCellUtil.fillTagRange(cell, tagsRange);
|
||||
tagsDeduplicator.add(tagsRange);
|
||||
}
|
||||
|
||||
/***************** internal add methods ************************/
|
||||
|
||||
private void addAfterRowFamilyQualifier(Cell cell){
|
||||
// timestamps
|
||||
timestamps[totalCells] = cell.getTimestamp();
|
||||
timestampEncoder.add(cell.getTimestamp());
|
||||
|
||||
// memstore timestamps
|
||||
if (includeMvccVersion) {
|
||||
mvccVersions[totalCells] = cell.getSequenceId();
|
||||
mvccVersionEncoder.add(cell.getSequenceId());
|
||||
totalUnencodedBytes += WritableUtils.getVIntSize(cell.getSequenceId());
|
||||
}else{
|
||||
//must overwrite in case there was a previous version in this array slot
|
||||
mvccVersions[totalCells] = 0L;
|
||||
if(totalCells == 0){//only need to do this for the first cell added
|
||||
mvccVersionEncoder.add(0L);
|
||||
}
|
||||
//totalUncompressedBytes += 0;//mvccVersion takes zero bytes when disabled
|
||||
}
|
||||
|
||||
// types
|
||||
typeBytes[totalCells] = cell.getTypeByte();
|
||||
cellTypeEncoder.add(cell.getTypeByte());
|
||||
|
||||
// values
|
||||
totalValueBytes += cell.getValueLength();
|
||||
// double the array each time we run out of space
|
||||
values = ArrayUtils.growIfNecessary(values, totalValueBytes, 2 * totalValueBytes);
|
||||
CellUtil.copyValueTo(cell, values, valueOffsets[totalCells]);
|
||||
if (cell.getValueLength() > maxValueLength) {
|
||||
maxValueLength = cell.getValueLength();
|
||||
}
|
||||
valueOffsets[totalCells + 1] = totalValueBytes;
|
||||
|
||||
// general
|
||||
totalUnencodedBytes += KeyValueUtil.length(cell);
|
||||
++totalCells;
|
||||
}
|
||||
|
||||
private void addFamilyPart(Cell cell) {
|
||||
if (MULITPLE_FAMILIES_POSSIBLE || totalCells == 0) {
|
||||
PrivateCellUtil.fillFamilyRange(cell, familyRange);
|
||||
familyDeduplicator.add(familyRange);
|
||||
}
|
||||
}
|
||||
|
||||
private void addQualifierPart(Cell cell) {
|
||||
PrivateCellUtil.fillQualifierRange(cell, qualifierRange);
|
||||
qualifierDeduplicator.add(qualifierRange);
|
||||
}
|
||||
|
||||
|
||||
/****************** compiling/flushing ********************/
|
||||
|
||||
/**
|
||||
* Expensive method. The second half of the encoding work happens here.
|
||||
*
|
||||
* Take all the separate accumulated data structures and turn them into a single stream of bytes
|
||||
* which is written to the outputStream.
|
||||
*/
|
||||
@Override
|
||||
public void flush() throws IOException {
|
||||
compile();
|
||||
|
||||
// do the actual flushing to the output stream. Order matters.
|
||||
blockMeta.writeVariableBytesToOutputStream(outputStream);
|
||||
rowWriter.writeBytes(outputStream);
|
||||
familyWriter.writeBytes(outputStream);
|
||||
qualifierWriter.writeBytes(outputStream);
|
||||
tagsWriter.writeBytes(outputStream);
|
||||
timestampEncoder.writeBytes(outputStream);
|
||||
mvccVersionEncoder.writeBytes(outputStream);
|
||||
//CellType bytes are in the row nodes. there is no additional type section
|
||||
outputStream.write(values, 0, totalValueBytes);
|
||||
}
|
||||
|
||||
/**
|
||||
* Now that all the cells have been added, do the work to reduce them to a series of byte[]
|
||||
* fragments that are ready to be written to the output stream.
|
||||
*/
|
||||
protected void compile(){
|
||||
blockMeta.setNumKeyValueBytes(totalUnencodedBytes);
|
||||
int lastValueOffset = valueOffsets[totalCells];
|
||||
blockMeta.setValueOffsetWidth(UFIntTool.numBytes(lastValueOffset));
|
||||
blockMeta.setValueLengthWidth(UFIntTool.numBytes(maxValueLength));
|
||||
blockMeta.setNumValueBytes(totalValueBytes);
|
||||
totalBytes += totalTagBytes + totalValueBytes;
|
||||
|
||||
//these compile methods will add to totalBytes
|
||||
compileTypes();
|
||||
compileMvccVersions();
|
||||
compileTimestamps();
|
||||
compileTags();
|
||||
compileQualifiers();
|
||||
compileFamilies();
|
||||
compileRows();
|
||||
|
||||
int numMetaBytes = blockMeta.calculateNumMetaBytes();
|
||||
blockMeta.setNumMetaBytes(numMetaBytes);
|
||||
totalBytes += numMetaBytes;
|
||||
}
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* The following "compile" methods do any intermediate work necessary to transform the cell
|
||||
* fragments collected during the writing phase into structures that are ready to write to the
|
||||
* outputStream.
|
||||
* </p>
|
||||
* The family and qualifier treatment is almost identical, as is timestamp and mvccVersion.
|
||||
*/
|
||||
|
||||
protected void compileTypes() {
|
||||
blockMeta.setAllSameType(cellTypeEncoder.areAllSameType());
|
||||
if(cellTypeEncoder.areAllSameType()){
|
||||
blockMeta.setAllTypes(cellTypeEncoder.getOnlyType());
|
||||
}
|
||||
}
|
||||
|
||||
protected void compileMvccVersions() {
|
||||
mvccVersionEncoder.compile();
|
||||
blockMeta.setMvccVersionFields(mvccVersionEncoder);
|
||||
int numMvccVersionBytes = mvccVersionEncoder.getOutputArrayLength();
|
||||
totalBytes += numMvccVersionBytes;
|
||||
}
|
||||
|
||||
protected void compileTimestamps() {
|
||||
timestampEncoder.compile();
|
||||
blockMeta.setTimestampFields(timestampEncoder);
|
||||
int numTimestampBytes = timestampEncoder.getOutputArrayLength();
|
||||
totalBytes += numTimestampBytes;
|
||||
}
|
||||
|
||||
protected void compileQualifiers() {
|
||||
blockMeta.setNumUniqueQualifiers(qualifierDeduplicator.size());
|
||||
qualifierDeduplicator.compile();
|
||||
qualifierTokenizer.addAll(qualifierDeduplicator.getSortedRanges());
|
||||
qualifierWriter.reconstruct(blockMeta, qualifierTokenizer, ColumnNodeType.QUALIFIER);
|
||||
qualifierWriter.compile();
|
||||
int numQualifierBytes = qualifierWriter.getNumBytes();
|
||||
blockMeta.setNumQualifierBytes(numQualifierBytes);
|
||||
totalBytes += numQualifierBytes;
|
||||
}
|
||||
|
||||
protected void compileFamilies() {
|
||||
blockMeta.setNumUniqueFamilies(familyDeduplicator.size());
|
||||
familyDeduplicator.compile();
|
||||
familyTokenizer.addAll(familyDeduplicator.getSortedRanges());
|
||||
familyWriter.reconstruct(blockMeta, familyTokenizer, ColumnNodeType.FAMILY);
|
||||
familyWriter.compile();
|
||||
int numFamilyBytes = familyWriter.getNumBytes();
|
||||
blockMeta.setNumFamilyBytes(numFamilyBytes);
|
||||
totalBytes += numFamilyBytes;
|
||||
}
|
||||
|
||||
protected void compileTags() {
|
||||
blockMeta.setNumUniqueTags(tagsDeduplicator.size());
|
||||
tagsDeduplicator.compile();
|
||||
tagsTokenizer.addAll(tagsDeduplicator.getSortedRanges());
|
||||
tagsWriter.reconstruct(blockMeta, tagsTokenizer, ColumnNodeType.TAGS);
|
||||
tagsWriter.compile();
|
||||
int numTagBytes = tagsWriter.getNumBytes();
|
||||
blockMeta.setNumTagsBytes(numTagBytes);
|
||||
totalBytes += numTagBytes;
|
||||
}
|
||||
|
||||
protected void compileRows() {
|
||||
rowWriter.reconstruct(this);
|
||||
rowWriter.compile();
|
||||
int numRowBytes = rowWriter.getNumBytes();
|
||||
blockMeta.setNumRowBytes(numRowBytes);
|
||||
blockMeta.setRowTreeDepth(rowTokenizer.getTreeDepth());
|
||||
totalBytes += numRowBytes;
|
||||
}
|
||||
|
||||
/********************* convenience getters ********************************/
|
||||
|
||||
public long getValueOffset(int index) {
|
||||
return valueOffsets[index];
|
||||
}
|
||||
|
||||
public int getValueLength(int index) {
|
||||
return (int) (valueOffsets[index + 1] - valueOffsets[index]);
|
||||
}
|
||||
|
||||
/************************* get/set *************************************/
|
||||
|
||||
public PrefixTreeBlockMeta getBlockMeta() {
|
||||
return blockMeta;
|
||||
}
|
||||
|
||||
public Tokenizer getRowTokenizer() {
|
||||
return rowTokenizer;
|
||||
}
|
||||
|
||||
public LongEncoder getTimestampEncoder() {
|
||||
return timestampEncoder;
|
||||
}
|
||||
|
||||
public int getTotalBytes() {
|
||||
return totalBytes;
|
||||
}
|
||||
|
||||
public long[] getTimestamps() {
|
||||
return timestamps;
|
||||
}
|
||||
|
||||
public long[] getMvccVersions() {
|
||||
return mvccVersions;
|
||||
}
|
||||
|
||||
public byte[] getTypeBytes() {
|
||||
return typeBytes;
|
||||
}
|
||||
|
||||
public LongEncoder getMvccVersionEncoder() {
|
||||
return mvccVersionEncoder;
|
||||
}
|
||||
|
||||
public ByteRangeSet getFamilySorter() {
|
||||
return familyDeduplicator;
|
||||
}
|
||||
|
||||
public ByteRangeSet getQualifierSorter() {
|
||||
return qualifierDeduplicator;
|
||||
}
|
||||
|
||||
public ByteRangeSet getTagSorter() {
|
||||
return tagsDeduplicator;
|
||||
}
|
||||
|
||||
public ColumnSectionWriter getFamilyWriter() {
|
||||
return familyWriter;
|
||||
}
|
||||
|
||||
public ColumnSectionWriter getQualifierWriter() {
|
||||
return qualifierWriter;
|
||||
}
|
||||
|
||||
public ColumnSectionWriter getTagWriter() {
|
||||
return tagsWriter;
|
||||
}
|
||||
|
||||
public RowSectionWriter getRowWriter() {
|
||||
return rowWriter;
|
||||
}
|
||||
|
||||
public ByteRange getValueByteRange() {
|
||||
return new SimpleMutableByteRange(values, 0, totalValueBytes);
|
||||
}
|
||||
|
||||
}
|
|
@ -1,136 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.hbase.codec.prefixtree.encode.column;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.OutputStream;
|
||||
|
||||
import org.apache.yetus.audience.InterfaceAudience;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeBlockMeta;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.encode.other.ColumnNodeType;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.encode.tokenize.TokenizerNode;
|
||||
import org.apache.hadoop.hbase.util.ByteRange;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
import org.apache.hadoop.hbase.util.Strings;
|
||||
import org.apache.hadoop.hbase.util.vint.UFIntTool;
|
||||
import org.apache.hadoop.hbase.util.vint.UVIntTool;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Column nodes can be either family nodes or qualifier nodes, as both sections encode similarly.
|
||||
* The family and qualifier sections of the data block are made of 1 or more of these nodes.
|
||||
* </p>
|
||||
* Each node is composed of 3 sections:<br>
|
||||
* <ul>
|
||||
* <li>tokenLength: UVInt (normally 1 byte) indicating the number of token bytes
|
||||
* <li>token[]: the actual token bytes
|
||||
* <li>parentStartPosition: the offset of the next node from the start of the family or qualifier
|
||||
* section
|
||||
* </ul>
|
||||
*/
|
||||
@InterfaceAudience.Private
|
||||
public class ColumnNodeWriter{
|
||||
|
||||
/************* fields ****************************/
|
||||
|
||||
protected TokenizerNode builderNode;
|
||||
protected PrefixTreeBlockMeta blockMeta;
|
||||
|
||||
protected int tokenLength;
|
||||
protected byte[] token;
|
||||
protected int parentStartPosition;
|
||||
protected ColumnNodeType nodeType;
|
||||
|
||||
|
||||
/*************** construct **************************/
|
||||
|
||||
public ColumnNodeWriter(PrefixTreeBlockMeta blockMeta, TokenizerNode builderNode,
|
||||
ColumnNodeType nodeType) {
|
||||
this.blockMeta = blockMeta;
|
||||
this.builderNode = builderNode;
|
||||
this.nodeType = nodeType;
|
||||
calculateTokenLength();
|
||||
}
|
||||
|
||||
|
||||
/************* methods *******************************/
|
||||
|
||||
public boolean isRoot() {
|
||||
return parentStartPosition == 0;
|
||||
}
|
||||
|
||||
private void calculateTokenLength() {
|
||||
tokenLength = builderNode.getTokenLength();
|
||||
token = new byte[tokenLength];
|
||||
}
|
||||
|
||||
/**
|
||||
* This method is called before blockMeta.qualifierOffsetWidth is known, so we pass in a
|
||||
* placeholder.
|
||||
* @param offsetWidthPlaceholder the placeholder
|
||||
* @return node width
|
||||
*/
|
||||
public int getWidthUsingPlaceholderForOffsetWidth(int offsetWidthPlaceholder) {
|
||||
int width = 0;
|
||||
width += UVIntTool.numBytes(tokenLength);
|
||||
width += token.length;
|
||||
width += offsetWidthPlaceholder;
|
||||
return width;
|
||||
}
|
||||
|
||||
public void writeBytes(OutputStream os) throws IOException {
|
||||
int parentOffsetWidth;
|
||||
if (this.nodeType == ColumnNodeType.FAMILY) {
|
||||
parentOffsetWidth = blockMeta.getFamilyOffsetWidth();
|
||||
} else if (this.nodeType == ColumnNodeType.QUALIFIER) {
|
||||
parentOffsetWidth = blockMeta.getQualifierOffsetWidth();
|
||||
} else {
|
||||
parentOffsetWidth = blockMeta.getTagsOffsetWidth();
|
||||
}
|
||||
UVIntTool.writeBytes(tokenLength, os);
|
||||
os.write(token);
|
||||
UFIntTool.writeBytes(parentOffsetWidth, parentStartPosition, os);
|
||||
}
|
||||
|
||||
public void setTokenBytes(ByteRange source) {
|
||||
source.deepCopySubRangeTo(0, tokenLength, token, 0);
|
||||
}
|
||||
|
||||
|
||||
/****************** standard methods ************************/
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
sb.append(Strings.padFront(builderNode.getOutputArrayOffset() + "", ' ', 3) + ",");
|
||||
sb.append("[");
|
||||
sb.append(Bytes.toString(token));
|
||||
sb.append("]->");
|
||||
sb.append(parentStartPosition);
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
|
||||
/************************** get/set ***********************/
|
||||
|
||||
public void setParentStartPosition(int parentStartPosition) {
|
||||
this.parentStartPosition = parentStartPosition;
|
||||
}
|
||||
|
||||
}
|
|
@ -1,209 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.hbase.codec.prefixtree.encode.column;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.OutputStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.yetus.audience.InterfaceAudience;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeBlockMeta;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.encode.other.ColumnNodeType;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.encode.tokenize.Tokenizer;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.encode.tokenize.TokenizerNode;
|
||||
import org.apache.hadoop.hbase.util.CollectionUtils;
|
||||
import org.apache.hadoop.hbase.util.vint.UFIntTool;
|
||||
|
||||
import org.apache.hadoop.hbase.shaded.com.google.common.collect.Lists;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Takes the tokenized family or qualifier data and flattens it into a stream of bytes. The family
|
||||
* section is written after the row section, and qualifier section after family section.
|
||||
* </p>
|
||||
* The family and qualifier tries, or "column tries", are structured differently than the row trie.
|
||||
* The trie cannot be reassembled without external data about the offsets of the leaf nodes, and
|
||||
* these external pointers are stored in the nubs and leaves of the row trie. For each cell in a
|
||||
* row, the row trie contains a list of offsets into the column sections (along with pointers to
|
||||
* timestamps and other per-cell fields). These offsets point to the last column node/token that
|
||||
* comprises the column name. To assemble the column name, the trie is traversed in reverse (right
|
||||
* to left), with the rightmost tokens pointing to the start of their "parent" node which is the
|
||||
* node to the left.
|
||||
* <p>
|
||||
* This choice was made to reduce the size of the column trie by storing the minimum amount of
|
||||
* offset data. As a result, to find a specific qualifier within a row, you must do a binary search
|
||||
* of the column nodes, reassembling each one as you search. Future versions of the PrefixTree might
|
||||
* encode the columns in both a forward and reverse trie, which would convert binary searches into
|
||||
* more efficient trie searches which would be beneficial for wide rows.
|
||||
* </p>
|
||||
*/
|
||||
@InterfaceAudience.Private
|
||||
public class ColumnSectionWriter {
|
||||
|
||||
public static final int EXPECTED_NUBS_PLUS_LEAVES = 100;
|
||||
|
||||
/****************** fields ****************************/
|
||||
|
||||
private PrefixTreeBlockMeta blockMeta;
|
||||
|
||||
private ColumnNodeType nodeType;
|
||||
private Tokenizer tokenizer;
|
||||
private int numBytes = 0;
|
||||
private ArrayList<TokenizerNode> nonLeaves;
|
||||
private ArrayList<TokenizerNode> leaves;
|
||||
private ArrayList<TokenizerNode> allNodes;
|
||||
private ArrayList<ColumnNodeWriter> columnNodeWriters;
|
||||
private List<Integer> outputArrayOffsets;
|
||||
|
||||
|
||||
/*********************** construct *********************/
|
||||
|
||||
public ColumnSectionWriter() {
|
||||
this.nonLeaves = Lists.newArrayList();
|
||||
this.leaves = Lists.newArrayList();
|
||||
this.outputArrayOffsets = Lists.newArrayList();
|
||||
}
|
||||
|
||||
public ColumnSectionWriter(PrefixTreeBlockMeta blockMeta, Tokenizer builder,
|
||||
ColumnNodeType nodeType) {
|
||||
this();// init collections
|
||||
reconstruct(blockMeta, builder, nodeType);
|
||||
}
|
||||
|
||||
public void reconstruct(PrefixTreeBlockMeta blockMeta, Tokenizer builder,
|
||||
ColumnNodeType nodeType) {
|
||||
this.blockMeta = blockMeta;
|
||||
this.tokenizer = builder;
|
||||
this.nodeType = nodeType;
|
||||
}
|
||||
|
||||
public void reset() {
|
||||
numBytes = 0;
|
||||
nonLeaves.clear();
|
||||
leaves.clear();
|
||||
outputArrayOffsets.clear();
|
||||
}
|
||||
|
||||
|
||||
/****************** methods *******************************/
|
||||
|
||||
public ColumnSectionWriter compile() {
|
||||
if (this.nodeType == ColumnNodeType.FAMILY) {
|
||||
// do nothing. max family length fixed at Byte.MAX_VALUE
|
||||
} else if (this.nodeType == ColumnNodeType.QUALIFIER) {
|
||||
blockMeta.setMaxQualifierLength(tokenizer.getMaxElementLength());
|
||||
} else {
|
||||
blockMeta.setMaxTagsLength(tokenizer.getMaxElementLength());
|
||||
}
|
||||
compilerInternals();
|
||||
return this;
|
||||
}
|
||||
|
||||
protected void compilerInternals() {
|
||||
tokenizer.setNodeFirstInsertionIndexes();
|
||||
tokenizer.appendNodes(nonLeaves, true, false);
|
||||
|
||||
tokenizer.appendNodes(leaves, false, true);
|
||||
|
||||
allNodes = Lists.newArrayListWithCapacity(nonLeaves.size() + leaves.size());
|
||||
allNodes.addAll(nonLeaves);
|
||||
allNodes.addAll(leaves);
|
||||
|
||||
columnNodeWriters = Lists.newArrayListWithCapacity(CollectionUtils.nullSafeSize(allNodes));
|
||||
for (int i = 0; i < allNodes.size(); ++i) {
|
||||
TokenizerNode node = allNodes.get(i);
|
||||
columnNodeWriters.add(new ColumnNodeWriter(blockMeta, node, this.nodeType));
|
||||
}
|
||||
|
||||
// leaf widths are known at this point, so add them up
|
||||
int totalBytesWithoutOffsets = 0;
|
||||
for (int i = allNodes.size() - 1; i >= 0; --i) {
|
||||
ColumnNodeWriter columnNodeWriter = columnNodeWriters.get(i);
|
||||
// leaves store all but their first token byte
|
||||
totalBytesWithoutOffsets += columnNodeWriter.getWidthUsingPlaceholderForOffsetWidth(0);
|
||||
}
|
||||
|
||||
// figure out how wide our offset FInts are
|
||||
int parentOffsetWidth = 0;
|
||||
while (true) {
|
||||
++parentOffsetWidth;
|
||||
int numBytesFinder = totalBytesWithoutOffsets + parentOffsetWidth * allNodes.size();
|
||||
if (numBytesFinder < UFIntTool.maxValueForNumBytes(parentOffsetWidth)) {
|
||||
numBytes = numBytesFinder;
|
||||
break;
|
||||
}// it fits
|
||||
}
|
||||
if (this.nodeType == ColumnNodeType.FAMILY) {
|
||||
blockMeta.setFamilyOffsetWidth(parentOffsetWidth);
|
||||
} else if (this.nodeType == ColumnNodeType.QUALIFIER) {
|
||||
blockMeta.setQualifierOffsetWidth(parentOffsetWidth);
|
||||
} else {
|
||||
blockMeta.setTagsOffsetWidth(parentOffsetWidth);
|
||||
}
|
||||
|
||||
int forwardIndex = 0;
|
||||
for (int i = 0; i < allNodes.size(); ++i) {
|
||||
TokenizerNode node = allNodes.get(i);
|
||||
ColumnNodeWriter columnNodeWriter = columnNodeWriters.get(i);
|
||||
int fullNodeWidth = columnNodeWriter
|
||||
.getWidthUsingPlaceholderForOffsetWidth(parentOffsetWidth);
|
||||
node.setOutputArrayOffset(forwardIndex);
|
||||
columnNodeWriter.setTokenBytes(node.getToken());
|
||||
if (node.isRoot()) {
|
||||
columnNodeWriter.setParentStartPosition(0);
|
||||
} else {
|
||||
columnNodeWriter.setParentStartPosition(node.getParent().getOutputArrayOffset());
|
||||
}
|
||||
forwardIndex += fullNodeWidth;
|
||||
}
|
||||
|
||||
tokenizer.appendOutputArrayOffsets(outputArrayOffsets);
|
||||
}
|
||||
|
||||
public void writeBytes(OutputStream os) throws IOException {
|
||||
for (ColumnNodeWriter columnNodeWriter : columnNodeWriters) {
|
||||
columnNodeWriter.writeBytes(os);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/************* get/set **************************/
|
||||
|
||||
public ArrayList<ColumnNodeWriter> getColumnNodeWriters() {
|
||||
return columnNodeWriters;
|
||||
}
|
||||
|
||||
public int getNumBytes() {
|
||||
return numBytes;
|
||||
}
|
||||
|
||||
public int getOutputArrayOffset(int sortedIndex) {
|
||||
return outputArrayOffsets.get(sortedIndex);
|
||||
}
|
||||
|
||||
public ArrayList<TokenizerNode> getNonLeaves() {
|
||||
return nonLeaves;
|
||||
}
|
||||
|
||||
public ArrayList<TokenizerNode> getLeaves() {
|
||||
return leaves;
|
||||
}
|
||||
|
||||
}
|
|
@ -1,68 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.hbase.codec.prefixtree.encode.other;
|
||||
|
||||
import org.apache.yetus.audience.InterfaceAudience;
|
||||
|
||||
/**
|
||||
* Detect if every KV has the same KeyValue.Type, in which case we don't need to store it for each
|
||||
* KV. If(allSameType) during conversion to byte[], then we can store the "onlyType" in blockMeta,
|
||||
* therefore not repeating it for each cell and saving 1 byte per cell.
|
||||
*/
|
||||
@InterfaceAudience.Private
|
||||
public class CellTypeEncoder {
|
||||
|
||||
/************* fields *********************/
|
||||
|
||||
protected boolean pendingFirstType = true;
|
||||
protected boolean allSameType = true;
|
||||
protected byte onlyType;
|
||||
|
||||
|
||||
/************* construct *********************/
|
||||
|
||||
public void reset() {
|
||||
pendingFirstType = true;
|
||||
allSameType = true;
|
||||
}
|
||||
|
||||
|
||||
/************* methods *************************/
|
||||
|
||||
public void add(byte type) {
|
||||
if (pendingFirstType) {
|
||||
onlyType = type;
|
||||
pendingFirstType = false;
|
||||
} else if (onlyType != type) {
|
||||
allSameType = false;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**************** get/set **************************/
|
||||
|
||||
public boolean areAllSameType() {
|
||||
return allSameType;
|
||||
}
|
||||
|
||||
public byte getOnlyType() {
|
||||
return onlyType;
|
||||
}
|
||||
|
||||
}
|
|
@ -1,28 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.hbase.codec.prefixtree.encode.other;
|
||||
|
||||
import org.apache.yetus.audience.InterfaceAudience;
|
||||
|
||||
/**
|
||||
* Specifies the type of columnnode writer.
|
||||
*/
|
||||
@InterfaceAudience.Private
|
||||
public enum ColumnNodeType {
|
||||
FAMILY, QUALIFIER, TAGS;
|
||||
}
|
|
@ -1,183 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.hbase.codec.prefixtree.encode.other;
|
||||
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.OutputStream;
|
||||
import java.util.Arrays;
|
||||
import java.util.HashSet;
|
||||
|
||||
import org.apache.yetus.audience.InterfaceAudience;
|
||||
import org.apache.hadoop.hbase.util.ArrayUtils;
|
||||
import org.apache.hadoop.hbase.util.CollectionUtils;
|
||||
import org.apache.hadoop.hbase.util.vint.UFIntTool;
|
||||
|
||||
import org.apache.hadoop.hbase.shaded.com.google.common.base.Joiner;
|
||||
|
||||
/**
|
||||
* Used to de-duplicate, sort, minimize/diff, and serialize timestamps and mvccVersions from a
|
||||
* collection of Cells.
|
||||
*
|
||||
* 1. add longs to a HashSet for fast de-duplication
|
||||
* 2. keep track of the min and max
|
||||
* 3. copy all values to a new long[]
|
||||
* 4. Collections.sort the long[]
|
||||
* 5. calculate maxDelta = max - min
|
||||
* 6. determine FInt width based on maxDelta
|
||||
* 7. PrefixTreeEncoder binary searches to find index of each value
|
||||
*/
|
||||
@InterfaceAudience.Private
|
||||
public class LongEncoder {
|
||||
|
||||
/****************** fields ****************************/
|
||||
|
||||
protected HashSet<Long> uniqueValues;
|
||||
protected long[] sortedUniqueValues;
|
||||
protected long min, max, maxDelta;
|
||||
|
||||
protected int bytesPerDelta;
|
||||
protected int bytesPerIndex;
|
||||
protected int totalCompressedBytes;
|
||||
|
||||
|
||||
/****************** construct ****************************/
|
||||
|
||||
public LongEncoder() {
|
||||
this.uniqueValues = new HashSet<>();
|
||||
}
|
||||
|
||||
public void reset() {
|
||||
uniqueValues.clear();
|
||||
sortedUniqueValues = null;
|
||||
min = Long.MAX_VALUE;
|
||||
max = Long.MIN_VALUE;
|
||||
maxDelta = Long.MIN_VALUE;
|
||||
bytesPerIndex = 0;
|
||||
bytesPerDelta = 0;
|
||||
totalCompressedBytes = 0;
|
||||
}
|
||||
|
||||
|
||||
/************* methods ***************************/
|
||||
|
||||
public void add(long timestamp) {
|
||||
uniqueValues.add(timestamp);
|
||||
}
|
||||
|
||||
public LongEncoder compile() {
|
||||
int numUnique = uniqueValues.size();
|
||||
if (numUnique == 1) {
|
||||
min = CollectionUtils.getFirst(uniqueValues);
|
||||
sortedUniqueValues = new long[] { min };
|
||||
return this;
|
||||
}
|
||||
|
||||
sortedUniqueValues = new long[numUnique];
|
||||
int lastIndex = -1;
|
||||
for (long value : uniqueValues) {
|
||||
sortedUniqueValues[++lastIndex] = value;
|
||||
}
|
||||
Arrays.sort(sortedUniqueValues);
|
||||
min = ArrayUtils.getFirst(sortedUniqueValues);
|
||||
max = ArrayUtils.getLast(sortedUniqueValues);
|
||||
maxDelta = max - min;
|
||||
if (maxDelta > 0) {
|
||||
bytesPerDelta = UFIntTool.numBytes(maxDelta);
|
||||
} else {
|
||||
bytesPerDelta = 0;
|
||||
}
|
||||
|
||||
int maxIndex = numUnique - 1;
|
||||
bytesPerIndex = UFIntTool.numBytes(maxIndex);
|
||||
|
||||
totalCompressedBytes = numUnique * bytesPerDelta;
|
||||
|
||||
return this;
|
||||
}
|
||||
|
||||
public long getDelta(int index) {
|
||||
if (sortedUniqueValues.length == 0) {
|
||||
return 0;
|
||||
}
|
||||
return sortedUniqueValues[index] - min;
|
||||
}
|
||||
|
||||
public int getIndex(long value) {
|
||||
// should always find an exact match
|
||||
return Arrays.binarySearch(sortedUniqueValues, value);
|
||||
}
|
||||
|
||||
public void writeBytes(OutputStream os) throws IOException {
|
||||
for (int i = 0; i < sortedUniqueValues.length; ++i) {
|
||||
long delta = sortedUniqueValues[i] - min;
|
||||
UFIntTool.writeBytes(bytesPerDelta, delta, os);
|
||||
}
|
||||
}
|
||||
|
||||
//convenience method for tests
|
||||
public byte[] getByteArray() throws IOException{
|
||||
ByteArrayOutputStream baos = new ByteArrayOutputStream();
|
||||
writeBytes(baos);
|
||||
return baos.toByteArray();
|
||||
}
|
||||
|
||||
public int getOutputArrayLength() {
|
||||
return sortedUniqueValues.length * bytesPerDelta;
|
||||
}
|
||||
|
||||
public int getNumUniqueValues() {
|
||||
return sortedUniqueValues.length;
|
||||
}
|
||||
|
||||
|
||||
/******************* Object methods **********************/
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
if (ArrayUtils.isEmpty(sortedUniqueValues)) {
|
||||
return "[]";
|
||||
}
|
||||
return "[" + Joiner.on(",").join(ArrayUtils.toList(sortedUniqueValues)) + "]";
|
||||
}
|
||||
|
||||
|
||||
/******************** get/set **************************/
|
||||
|
||||
public long getMin() {
|
||||
return min;
|
||||
}
|
||||
|
||||
public int getBytesPerDelta() {
|
||||
return bytesPerDelta;
|
||||
}
|
||||
|
||||
public int getBytesPerIndex() {
|
||||
return bytesPerIndex;
|
||||
}
|
||||
|
||||
public int getTotalCompressedBytes() {
|
||||
return totalCompressedBytes;
|
||||
}
|
||||
|
||||
public long[] getSortedUniqueTimestamps() {
|
||||
return sortedUniqueValues;
|
||||
}
|
||||
|
||||
}
|
|
@ -1,300 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.hbase.codec.prefixtree.encode.row;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.OutputStream;
|
||||
import java.util.ArrayList;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.yetus.audience.InterfaceAudience;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeBlockMeta;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.encode.PrefixTreeEncoder;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.encode.tokenize.TokenizerNode;
|
||||
import org.apache.hadoop.hbase.util.ByteRangeUtils;
|
||||
import org.apache.hadoop.hbase.util.CollectionUtils;
|
||||
import org.apache.hadoop.hbase.util.vint.UFIntTool;
|
||||
import org.apache.hadoop.hbase.util.vint.UVIntTool;
|
||||
|
||||
/**
|
||||
* Serializes the fields comprising one node of the row trie, which can be a branch, nub, or leaf.
|
||||
* Please see the write() method for the order in which data is written.
|
||||
*/
|
||||
@InterfaceAudience.Private
|
||||
public class RowNodeWriter{
|
||||
protected static final Log LOG = LogFactory.getLog(RowNodeWriter.class);
|
||||
|
||||
/********************* fields ******************************/
|
||||
|
||||
protected PrefixTreeEncoder prefixTreeEncoder;
|
||||
protected PrefixTreeBlockMeta blockMeta;
|
||||
protected TokenizerNode tokenizerNode;
|
||||
|
||||
protected int tokenWidth;
|
||||
protected int fanOut;
|
||||
protected int numCells;
|
||||
|
||||
protected int width;
|
||||
|
||||
|
||||
/*********************** construct *************************/
|
||||
|
||||
public RowNodeWriter(PrefixTreeEncoder keyValueBuilder, TokenizerNode tokenizerNode) {
|
||||
reconstruct(keyValueBuilder, tokenizerNode);
|
||||
}
|
||||
|
||||
public void reconstruct(PrefixTreeEncoder prefixTreeEncoder, TokenizerNode tokenizerNode) {
|
||||
this.prefixTreeEncoder = prefixTreeEncoder;
|
||||
reset(tokenizerNode);
|
||||
}
|
||||
|
||||
public void reset(TokenizerNode node) {
|
||||
this.blockMeta = prefixTreeEncoder.getBlockMeta();// changes between blocks
|
||||
this.tokenizerNode = node;
|
||||
this.tokenWidth = 0;
|
||||
this.fanOut = 0;
|
||||
this.numCells = 0;
|
||||
this.width = 0;
|
||||
calculateOffsetsAndLengths();
|
||||
}
|
||||
|
||||
|
||||
/********************* methods ****************************/
|
||||
|
||||
protected void calculateOffsetsAndLengths(){
|
||||
tokenWidth = tokenizerNode.getTokenLength();
|
||||
if(!tokenizerNode.isRoot()){
|
||||
--tokenWidth;//root has no parent
|
||||
}
|
||||
fanOut = CollectionUtils.nullSafeSize(tokenizerNode.getChildren());
|
||||
numCells = tokenizerNode.getNumOccurrences();
|
||||
}
|
||||
|
||||
public int calculateWidth(){
|
||||
calculateWidthOverrideOffsetWidth(blockMeta.getNextNodeOffsetWidth());
|
||||
return width;
|
||||
}
|
||||
|
||||
public int calculateWidthOverrideOffsetWidth(int offsetWidth){
|
||||
width = 0;
|
||||
width += UVIntTool.numBytes(tokenWidth);
|
||||
width += tokenWidth;
|
||||
|
||||
width += UVIntTool.numBytes(fanOut);
|
||||
width += fanOut;
|
||||
|
||||
width += UVIntTool.numBytes(numCells);
|
||||
|
||||
if(tokenizerNode.hasOccurrences()){
|
||||
int fixedBytesPerCell = blockMeta.getFamilyOffsetWidth()
|
||||
+ blockMeta.getQualifierOffsetWidth()
|
||||
+ blockMeta.getTagsOffsetWidth()
|
||||
+ blockMeta.getTimestampIndexWidth()
|
||||
+ blockMeta.getMvccVersionIndexWidth()
|
||||
+ blockMeta.getKeyValueTypeWidth()
|
||||
+ blockMeta.getValueOffsetWidth()
|
||||
+ blockMeta.getValueLengthWidth();
|
||||
width += numCells * fixedBytesPerCell;
|
||||
}
|
||||
|
||||
if (!tokenizerNode.isLeaf()) {
|
||||
width += fanOut * offsetWidth;
|
||||
}
|
||||
|
||||
return width;
|
||||
}
|
||||
|
||||
|
||||
/*********************** writing the compiled structure to the OutputStream ***************/
|
||||
|
||||
public void write(OutputStream os) throws IOException{
|
||||
//info about this row trie node
|
||||
writeRowToken(os);
|
||||
writeFan(os);
|
||||
writeNumCells(os);
|
||||
|
||||
//UFInt indexes and offsets for each cell in the row (if nub or leaf)
|
||||
writeFamilyNodeOffsets(os);
|
||||
writeQualifierNodeOffsets(os);
|
||||
writeTagNodeOffsets(os);
|
||||
writeTimestampIndexes(os);
|
||||
writeMvccVersionIndexes(os);
|
||||
writeCellTypes(os);
|
||||
writeValueOffsets(os);
|
||||
writeValueLengths(os);
|
||||
//offsets to the children of this row trie node (if branch or nub)
|
||||
writeNextRowTrieNodeOffsets(os);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Row node token, fan, and numCells. Written once at the beginning of each row node. These 3
|
||||
* fields can reproduce all the row keys that compose the block.
|
||||
*/
|
||||
|
||||
/**
|
||||
* UVInt: tokenWidth
|
||||
* bytes: token
|
||||
*/
|
||||
protected void writeRowToken(OutputStream os) throws IOException {
|
||||
UVIntTool.writeBytes(tokenWidth, os);
|
||||
int tokenStartIndex = tokenizerNode.isRoot() ? 0 : 1;
|
||||
ByteRangeUtils.write(os, tokenizerNode.getToken(), tokenStartIndex);
|
||||
}
|
||||
|
||||
/**
|
||||
* UVInt: numFanBytes/fanOut
|
||||
* bytes: each fan byte
|
||||
*/
|
||||
public void writeFan(OutputStream os) throws IOException {
|
||||
UVIntTool.writeBytes(fanOut, os);
|
||||
if (fanOut <= 0) {
|
||||
return;
|
||||
}
|
||||
ArrayList<TokenizerNode> children = tokenizerNode.getChildren();
|
||||
for (int i = 0; i < children.size(); ++i) {
|
||||
TokenizerNode child = children.get(i);
|
||||
os.write(child.getToken().get(0));// first byte of each child's token
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* UVInt: numCells, the number of cells in this row which will be 0 for branch nodes
|
||||
*/
|
||||
protected void writeNumCells(OutputStream os) throws IOException {
|
||||
UVIntTool.writeBytes(numCells, os);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* The following methods write data for each cell in the row, mostly consisting of indexes or
|
||||
* offsets into the timestamp/column data structures that are written in the middle of the block.
|
||||
* We use {@link UFIntTool} to encode these indexes/offsets to allow random access during a binary
|
||||
* search of a particular column/timestamp combination.
|
||||
* <p>
|
||||
* Branch nodes will not have any data in these sections.
|
||||
* </p>
|
||||
*/
|
||||
|
||||
protected void writeFamilyNodeOffsets(OutputStream os) throws IOException {
|
||||
if (blockMeta.getFamilyOffsetWidth() <= 0) {
|
||||
return;
|
||||
}
|
||||
for (int i = 0; i < numCells; ++i) {
|
||||
int cellInsertionIndex = PrefixTreeEncoder.MULITPLE_FAMILIES_POSSIBLE ? tokenizerNode
|
||||
.getFirstInsertionIndex() + i : 0;
|
||||
int sortedIndex = prefixTreeEncoder.getFamilySorter().getSortedIndexForInsertionId(
|
||||
cellInsertionIndex);
|
||||
int indexedFamilyOffset = prefixTreeEncoder.getFamilyWriter().getOutputArrayOffset(
|
||||
sortedIndex);
|
||||
UFIntTool.writeBytes(blockMeta.getFamilyOffsetWidth(), indexedFamilyOffset, os);
|
||||
}
|
||||
}
|
||||
|
||||
protected void writeQualifierNodeOffsets(OutputStream os) throws IOException {
|
||||
if (blockMeta.getQualifierOffsetWidth() <= 0) {
|
||||
return;
|
||||
}
|
||||
for (int i = 0; i < numCells; ++i) {
|
||||
int cellInsertionIndex = tokenizerNode.getFirstInsertionIndex() + i;
|
||||
int sortedIndex = prefixTreeEncoder.getQualifierSorter().getSortedIndexForInsertionId(
|
||||
cellInsertionIndex);
|
||||
int indexedQualifierOffset = prefixTreeEncoder.getQualifierWriter().getOutputArrayOffset(
|
||||
sortedIndex);
|
||||
UFIntTool.writeBytes(blockMeta.getQualifierOffsetWidth(), indexedQualifierOffset, os);
|
||||
}
|
||||
}
|
||||
|
||||
protected void writeTagNodeOffsets(OutputStream os) throws IOException {
|
||||
if (blockMeta.getTagsOffsetWidth() <= 0) {
|
||||
return;
|
||||
}
|
||||
for (int i = 0; i < numCells; ++i) {
|
||||
int cellInsertionIndex = tokenizerNode.getFirstInsertionIndex() + i;
|
||||
int sortedIndex = prefixTreeEncoder.getTagSorter().getSortedIndexForInsertionId(
|
||||
cellInsertionIndex);
|
||||
int indexedTagOffset = prefixTreeEncoder.getTagWriter().getOutputArrayOffset(
|
||||
sortedIndex);
|
||||
UFIntTool.writeBytes(blockMeta.getTagsOffsetWidth(), indexedTagOffset, os);
|
||||
}
|
||||
}
|
||||
|
||||
protected void writeTimestampIndexes(OutputStream os) throws IOException {
|
||||
if (blockMeta.getTimestampIndexWidth() <= 0) {
|
||||
return;
|
||||
}
|
||||
for (int i = 0; i < numCells; ++i) {
|
||||
int cellInsertionIndex = tokenizerNode.getFirstInsertionIndex() + i;
|
||||
long timestamp = prefixTreeEncoder.getTimestamps()[cellInsertionIndex];
|
||||
int timestampIndex = prefixTreeEncoder.getTimestampEncoder().getIndex(timestamp);
|
||||
UFIntTool.writeBytes(blockMeta.getTimestampIndexWidth(), timestampIndex, os);
|
||||
}
|
||||
}
|
||||
|
||||
protected void writeMvccVersionIndexes(OutputStream os) throws IOException {
|
||||
if (blockMeta.getMvccVersionIndexWidth() <= 0) {
|
||||
return;
|
||||
}
|
||||
for (int i = 0; i < numCells; ++i) {
|
||||
int cellInsertionIndex = tokenizerNode.getFirstInsertionIndex() + i;
|
||||
long mvccVersion = prefixTreeEncoder.getMvccVersions()[cellInsertionIndex];
|
||||
int mvccVersionIndex = prefixTreeEncoder.getMvccVersionEncoder().getIndex(mvccVersion);
|
||||
UFIntTool.writeBytes(blockMeta.getMvccVersionIndexWidth(), mvccVersionIndex, os);
|
||||
}
|
||||
}
|
||||
|
||||
protected void writeCellTypes(OutputStream os) throws IOException {
|
||||
if (blockMeta.isAllSameType()) {
|
||||
return;
|
||||
}
|
||||
for (int i = 0; i < numCells; ++i) {
|
||||
int cellInsertionIndex = tokenizerNode.getFirstInsertionIndex() + i;
|
||||
os.write(prefixTreeEncoder.getTypeBytes()[cellInsertionIndex]);
|
||||
}
|
||||
}
|
||||
|
||||
protected void writeValueOffsets(OutputStream os) throws IOException {
|
||||
for (int i = 0; i < numCells; ++i) {
|
||||
int cellInsertionIndex = tokenizerNode.getFirstInsertionIndex() + i;
|
||||
long valueStartIndex = prefixTreeEncoder.getValueOffset(cellInsertionIndex);
|
||||
UFIntTool.writeBytes(blockMeta.getValueOffsetWidth(), valueStartIndex, os);
|
||||
}
|
||||
}
|
||||
|
||||
protected void writeValueLengths(OutputStream os) throws IOException {
|
||||
for (int i = 0; i < numCells; ++i) {
|
||||
int cellInsertionIndex = tokenizerNode.getFirstInsertionIndex() + i;
|
||||
int valueLength = prefixTreeEncoder.getValueLength(cellInsertionIndex);
|
||||
UFIntTool.writeBytes(blockMeta.getValueLengthWidth(), valueLength, os);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* If a branch or a nub, the last thing we append are the UFInt offsets to the child row nodes.
|
||||
*/
|
||||
protected void writeNextRowTrieNodeOffsets(OutputStream os) throws IOException {
|
||||
ArrayList<TokenizerNode> children = tokenizerNode.getChildren();
|
||||
for (int i = 0; i < children.size(); ++i) {
|
||||
TokenizerNode child = children.get(i);
|
||||
int distanceToChild = tokenizerNode.getNegativeIndex() - child.getNegativeIndex();
|
||||
UFIntTool.writeBytes(blockMeta.getNextNodeOffsetWidth(), distanceToChild, os);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,219 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.hbase.codec.prefixtree.encode.row;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.OutputStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.yetus.audience.InterfaceAudience;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeBlockMeta;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.encode.PrefixTreeEncoder;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.encode.tokenize.TokenizerNode;
|
||||
import org.apache.hadoop.hbase.util.vint.UFIntTool;
|
||||
|
||||
import org.apache.hadoop.hbase.shaded.com.google.common.collect.Lists;
|
||||
|
||||
/**
|
||||
* Most of the complexity of the PrefixTree is contained in the "row section". It contains the row
|
||||
* key trie structure used to search and recreate all the row keys. Each nub and leaf in this trie
|
||||
* also contains references to offsets in the other sections of the data block that enable the
|
||||
* decoder to match a row key with its qualifier, timestamp, type, value, etc.
|
||||
* <p>
|
||||
* The row section is a concatenated collection of {@link RowNodeWriter}s. See that class for the
|
||||
* internals of each row node.
|
||||
*/
|
||||
@InterfaceAudience.Private
|
||||
public class RowSectionWriter {
|
||||
|
||||
/***************** fields **************************/
|
||||
|
||||
protected PrefixTreeEncoder prefixTreeEncoder;
|
||||
|
||||
protected PrefixTreeBlockMeta blockMeta;
|
||||
|
||||
protected int numBytes;
|
||||
|
||||
protected ArrayList<TokenizerNode> nonLeaves;
|
||||
protected ArrayList<TokenizerNode> leaves;
|
||||
|
||||
protected ArrayList<RowNodeWriter> leafWriters;
|
||||
protected ArrayList<RowNodeWriter> nonLeafWriters;
|
||||
|
||||
protected int numLeafWriters;
|
||||
protected int numNonLeafWriters;
|
||||
|
||||
|
||||
/********************* construct **********************/
|
||||
|
||||
public RowSectionWriter() {
|
||||
this.nonLeaves = Lists.newArrayList();
|
||||
this.leaves = Lists.newArrayList();
|
||||
this.leafWriters = Lists.newArrayList();
|
||||
this.nonLeafWriters = Lists.newArrayList();
|
||||
}
|
||||
|
||||
public RowSectionWriter(PrefixTreeEncoder prefixTreeEncoder) {
|
||||
reconstruct(prefixTreeEncoder);
|
||||
}
|
||||
|
||||
public void reconstruct(PrefixTreeEncoder prefixTreeEncoder) {
|
||||
this.prefixTreeEncoder = prefixTreeEncoder;
|
||||
this.blockMeta = prefixTreeEncoder.getBlockMeta();
|
||||
reset();
|
||||
}
|
||||
|
||||
public void reset() {
|
||||
numBytes = 0;
|
||||
nonLeaves.clear();
|
||||
leaves.clear();
|
||||
numLeafWriters = 0;
|
||||
numNonLeafWriters = 0;
|
||||
}
|
||||
|
||||
|
||||
/****************** methods *******************************/
|
||||
|
||||
public RowSectionWriter compile() {
|
||||
blockMeta.setMaxRowLength(prefixTreeEncoder.getRowTokenizer().getMaxElementLength());
|
||||
prefixTreeEncoder.getRowTokenizer().setNodeFirstInsertionIndexes();
|
||||
|
||||
prefixTreeEncoder.getRowTokenizer().appendNodes(nonLeaves, true, false);
|
||||
prefixTreeEncoder.getRowTokenizer().appendNodes(leaves, false, true);
|
||||
|
||||
// track the starting position of each node in final output
|
||||
int negativeIndex = 0;
|
||||
|
||||
// create leaf writer nodes
|
||||
// leaf widths are known at this point, so add them up
|
||||
int totalLeafBytes = 0;
|
||||
for (int i = leaves.size() - 1; i >= 0; --i) {
|
||||
TokenizerNode leaf = leaves.get(i);
|
||||
RowNodeWriter leafWriter = initializeWriter(leafWriters, numLeafWriters, leaf);
|
||||
++numLeafWriters;
|
||||
// leaves store all but their first token byte
|
||||
int leafNodeWidth = leafWriter.calculateWidthOverrideOffsetWidth(0);
|
||||
totalLeafBytes += leafNodeWidth;
|
||||
negativeIndex += leafNodeWidth;
|
||||
leaf.setNegativeIndex(negativeIndex);
|
||||
}
|
||||
|
||||
int totalNonLeafBytesWithoutOffsets = 0;
|
||||
int totalChildPointers = 0;
|
||||
for (int i = nonLeaves.size() - 1; i >= 0; --i) {
|
||||
TokenizerNode nonLeaf = nonLeaves.get(i);
|
||||
RowNodeWriter nonLeafWriter = initializeWriter(nonLeafWriters, numNonLeafWriters, nonLeaf);
|
||||
++numNonLeafWriters;
|
||||
totalNonLeafBytesWithoutOffsets += nonLeafWriter.calculateWidthOverrideOffsetWidth(0);
|
||||
totalChildPointers += nonLeaf.getNumChildren();
|
||||
}
|
||||
|
||||
// figure out how wide our offset FInts are
|
||||
int offsetWidth = 0;
|
||||
while (true) {
|
||||
++offsetWidth;
|
||||
int offsetBytes = totalChildPointers * offsetWidth;
|
||||
int totalRowBytes = totalNonLeafBytesWithoutOffsets + offsetBytes + totalLeafBytes;
|
||||
if (totalRowBytes < UFIntTool.maxValueForNumBytes(offsetWidth)) {
|
||||
// it fits
|
||||
numBytes = totalRowBytes;
|
||||
break;
|
||||
}
|
||||
}
|
||||
blockMeta.setNextNodeOffsetWidth(offsetWidth);
|
||||
|
||||
// populate negativeIndexes
|
||||
for (int i = nonLeaves.size() - 1; i >= 0; --i) {
|
||||
TokenizerNode nonLeaf = nonLeaves.get(i);
|
||||
int writerIndex = nonLeaves.size() - i - 1;
|
||||
RowNodeWriter nonLeafWriter = nonLeafWriters.get(writerIndex);
|
||||
int nodeWidth = nonLeafWriter.calculateWidth();
|
||||
negativeIndex += nodeWidth;
|
||||
nonLeaf.setNegativeIndex(negativeIndex);
|
||||
}
|
||||
|
||||
return this;
|
||||
}
|
||||
|
||||
protected RowNodeWriter initializeWriter(List<RowNodeWriter> list, int index,
|
||||
TokenizerNode builderNode) {
|
||||
RowNodeWriter rowNodeWriter = null;
|
||||
//check if there is an existing node we can recycle
|
||||
if (index >= list.size()) {
|
||||
//there are not enough existing nodes, so add a new one which will be retrieved below
|
||||
list.add(new RowNodeWriter(prefixTreeEncoder, builderNode));
|
||||
}
|
||||
rowNodeWriter = list.get(index);
|
||||
rowNodeWriter.reset(builderNode);
|
||||
return rowNodeWriter;
|
||||
}
|
||||
|
||||
|
||||
public void writeBytes(OutputStream os) throws IOException {
|
||||
for (int i = numNonLeafWriters - 1; i >= 0; --i) {
|
||||
RowNodeWriter nonLeafWriter = nonLeafWriters.get(i);
|
||||
nonLeafWriter.write(os);
|
||||
}
|
||||
// duplicates above... written more for clarity right now
|
||||
for (int i = numLeafWriters - 1; i >= 0; --i) {
|
||||
RowNodeWriter leafWriter = leafWriters.get(i);
|
||||
leafWriter.write(os);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/***************** static ******************************/
|
||||
|
||||
protected static ArrayList<TokenizerNode> filterByLeafAndReverse(
|
||||
ArrayList<TokenizerNode> ins, boolean leaves) {
|
||||
ArrayList<TokenizerNode> outs = Lists.newArrayList();
|
||||
for (int i = ins.size() - 1; i >= 0; --i) {
|
||||
TokenizerNode n = ins.get(i);
|
||||
if (n.isLeaf() && leaves || (!n.isLeaf() && !leaves)) {
|
||||
outs.add(ins.get(i));
|
||||
}
|
||||
}
|
||||
return outs;
|
||||
}
|
||||
|
||||
|
||||
/************* get/set **************************/
|
||||
|
||||
public int getNumBytes() {
|
||||
return numBytes;
|
||||
}
|
||||
|
||||
public ArrayList<TokenizerNode> getNonLeaves() {
|
||||
return nonLeaves;
|
||||
}
|
||||
|
||||
public ArrayList<TokenizerNode> getLeaves() {
|
||||
return leaves;
|
||||
}
|
||||
|
||||
public ArrayList<RowNodeWriter> getNonLeafWriters() {
|
||||
return nonLeafWriters;
|
||||
}
|
||||
|
||||
public ArrayList<RowNodeWriter> getLeafWriters() {
|
||||
return leafWriters;
|
||||
}
|
||||
|
||||
}
|
|
@ -1,241 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.hbase.codec.prefixtree.encode.tokenize;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.yetus.audience.InterfaceAudience;
|
||||
import org.apache.hadoop.hbase.util.ArrayUtils;
|
||||
import org.apache.hadoop.hbase.util.ByteRange;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
import org.apache.hadoop.hbase.util.CollectionUtils;
|
||||
|
||||
import org.apache.hadoop.hbase.shaded.com.google.common.collect.Lists;
|
||||
|
||||
/**
|
||||
* Data structure used in the first stage of PrefixTree encoding:
|
||||
* <ul>
|
||||
* <li>accepts a sorted stream of ByteRanges
|
||||
* <li>splits them into a set of tokens, each held by a {@link TokenizerNode}
|
||||
* <li>connects the TokenizerNodes via standard java references
|
||||
* <li>keeps a pool of TokenizerNodes and a reusable byte[] for holding all token content
|
||||
* </ul>
|
||||
* <p><br>
|
||||
* Mainly used for turning Cell rowKeys into a trie, but also used for family and qualifier
|
||||
* encoding.
|
||||
*/
|
||||
@InterfaceAudience.Private
|
||||
public class Tokenizer{
|
||||
|
||||
/***************** fields **************************/
|
||||
|
||||
protected int numArraysAdded = 0;
|
||||
protected long lastNodeId = -1;
|
||||
protected ArrayList<TokenizerNode> nodes;
|
||||
protected int numNodes;
|
||||
protected TokenizerNode root;
|
||||
protected byte[] tokens;
|
||||
protected int tokensLength;
|
||||
|
||||
protected int maxElementLength = 0;
|
||||
// number of levels in the tree assuming root level is 0
|
||||
protected int treeDepth = 0;
|
||||
|
||||
|
||||
/******************* construct *******************/
|
||||
|
||||
public Tokenizer() {
|
||||
this.nodes = Lists.newArrayList();
|
||||
this.tokens = new byte[0];
|
||||
}
|
||||
|
||||
public void reset() {
|
||||
numArraysAdded = 0;
|
||||
lastNodeId = -1;
|
||||
numNodes = 0;
|
||||
tokensLength = 0;
|
||||
root = null;
|
||||
maxElementLength = 0;
|
||||
treeDepth = 0;
|
||||
}
|
||||
|
||||
|
||||
/***************** building *************************/
|
||||
|
||||
public void addAll(ArrayList<ByteRange> sortedByteRanges) {
|
||||
for (int i = 0; i < sortedByteRanges.size(); ++i) {
|
||||
ByteRange byteRange = sortedByteRanges.get(i);
|
||||
addSorted(byteRange);
|
||||
}
|
||||
}
|
||||
|
||||
public void addSorted(final ByteRange bytes) {
|
||||
++numArraysAdded;
|
||||
if (bytes.getLength() > maxElementLength) {
|
||||
maxElementLength = bytes.getLength();
|
||||
}
|
||||
if (root == null) {
|
||||
// nodeDepth of firstNode (non-root) is 1
|
||||
root = addNode(null, 1, 0, bytes, 0);
|
||||
} else {
|
||||
root.addSorted(bytes);
|
||||
}
|
||||
}
|
||||
|
||||
public void incrementNumOccurrencesOfLatestValue(){
|
||||
CollectionUtils.getLast(nodes).incrementNumOccurrences(1);
|
||||
}
|
||||
|
||||
protected long nextNodeId() {
|
||||
return ++lastNodeId;
|
||||
}
|
||||
|
||||
protected TokenizerNode addNode(TokenizerNode parent, int nodeDepth, int tokenStartOffset,
|
||||
final ByteRange token, int inputTokenOffset) {
|
||||
int inputTokenLength = token.getLength() - inputTokenOffset;
|
||||
int tokenOffset = appendTokenAndRepointByteRange(token, inputTokenOffset);
|
||||
TokenizerNode node = null;
|
||||
if (nodes.size() <= numNodes) {
|
||||
node = new TokenizerNode(this, parent, nodeDepth, tokenStartOffset, tokenOffset,
|
||||
inputTokenLength);
|
||||
nodes.add(node);
|
||||
} else {
|
||||
node = nodes.get(numNodes);
|
||||
node.reset();
|
||||
node.reconstruct(this, parent, nodeDepth, tokenStartOffset, tokenOffset, inputTokenLength);
|
||||
}
|
||||
++numNodes;
|
||||
return node;
|
||||
}
|
||||
|
||||
protected int appendTokenAndRepointByteRange(final ByteRange token, int inputTokenOffset) {
|
||||
int newOffset = tokensLength;
|
||||
int inputTokenLength = token.getLength() - inputTokenOffset;
|
||||
int newMinimum = tokensLength + inputTokenLength;
|
||||
tokens = ArrayUtils.growIfNecessary(tokens, newMinimum, 2 * newMinimum);
|
||||
token.deepCopySubRangeTo(inputTokenOffset, inputTokenLength, tokens, tokensLength);
|
||||
tokensLength += inputTokenLength;
|
||||
return newOffset;
|
||||
}
|
||||
|
||||
protected void submitMaxNodeDepthCandidate(int nodeDepth) {
|
||||
if (nodeDepth > treeDepth) {
|
||||
treeDepth = nodeDepth;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/********************* read ********************/
|
||||
|
||||
public int getNumAdded(){
|
||||
return numArraysAdded;
|
||||
}
|
||||
|
||||
// for debugging
|
||||
public ArrayList<TokenizerNode> getNodes(boolean includeNonLeaves, boolean includeLeaves) {
|
||||
ArrayList<TokenizerNode> nodes = Lists.newArrayList();
|
||||
root.appendNodesToExternalList(nodes, includeNonLeaves, includeLeaves);
|
||||
return nodes;
|
||||
}
|
||||
|
||||
public void appendNodes(List<TokenizerNode> appendTo, boolean includeNonLeaves,
|
||||
boolean includeLeaves) {
|
||||
root.appendNodesToExternalList(appendTo, includeNonLeaves, includeLeaves);
|
||||
}
|
||||
|
||||
public List<byte[]> getArrays() {
|
||||
List<TokenizerNode> nodes = new ArrayList<>();
|
||||
root.appendNodesToExternalList(nodes, true, true);
|
||||
List<byte[]> byteArrays = Lists.newArrayListWithCapacity(CollectionUtils.nullSafeSize(nodes));
|
||||
for (int i = 0; i < nodes.size(); ++i) {
|
||||
TokenizerNode node = nodes.get(i);
|
||||
for (int j = 0; j < node.getNumOccurrences(); ++j) {
|
||||
byte[] byteArray = node.getNewByteArray();
|
||||
byteArrays.add(byteArray);
|
||||
}
|
||||
}
|
||||
return byteArrays;
|
||||
}
|
||||
|
||||
//currently unused, but working and possibly useful in the future
|
||||
public void getNode(TokenizerRowSearchResult resultHolder, byte[] key, int keyOffset,
|
||||
int keyLength) {
|
||||
root.getNode(resultHolder, key, keyOffset, keyLength);
|
||||
}
|
||||
|
||||
|
||||
/********************** write ***************************/
|
||||
|
||||
public Tokenizer setNodeFirstInsertionIndexes() {
|
||||
root.setInsertionIndexes(0);
|
||||
return this;
|
||||
}
|
||||
|
||||
public Tokenizer appendOutputArrayOffsets(List<Integer> offsets) {
|
||||
root.appendOutputArrayOffsets(offsets);
|
||||
return this;
|
||||
}
|
||||
|
||||
|
||||
/********************* print/debug ********************/
|
||||
|
||||
protected static final Boolean INCLUDE_FULL_TREE_IN_TO_STRING = false;
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
sb.append(getStructuralString());
|
||||
if (INCLUDE_FULL_TREE_IN_TO_STRING) {
|
||||
for (byte[] bytes : getArrays()) {
|
||||
if (sb.length() > 0) {
|
||||
sb.append("\n");
|
||||
}
|
||||
sb.append(Bytes.toString(bytes));
|
||||
}
|
||||
}
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
public String getStructuralString() {
|
||||
List<TokenizerNode> nodes = getNodes(true, true);
|
||||
StringBuilder sb = new StringBuilder();
|
||||
for (TokenizerNode node : nodes) {
|
||||
String line = node.getPaddedTokenAndOccurrenceString();
|
||||
sb.append(line + "\n");
|
||||
}
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
|
||||
/****************** get/set ************************/
|
||||
|
||||
public TokenizerNode getRoot() {
|
||||
return root;
|
||||
}
|
||||
|
||||
public int getMaxElementLength() {
|
||||
return maxElementLength;
|
||||
}
|
||||
|
||||
public int getTreeDepth() {
|
||||
return treeDepth;
|
||||
}
|
||||
|
||||
}
|
|
@ -1,639 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.hbase.codec.prefixtree.encode.tokenize;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.yetus.audience.InterfaceAudience;
|
||||
import org.apache.hadoop.hbase.util.ByteRange;
|
||||
import org.apache.hadoop.hbase.util.ByteRangeUtils;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
import org.apache.hadoop.hbase.util.CollectionUtils;
|
||||
import org.apache.hadoop.hbase.util.SimpleMutableByteRange;
|
||||
import org.apache.hadoop.hbase.util.Strings;
|
||||
|
||||
import org.apache.hadoop.hbase.shaded.com.google.common.collect.Lists;
|
||||
|
||||
/**
|
||||
* Individual node in a Trie structure. Each node is one of 3 types:
|
||||
* <ul>
|
||||
* <li>Branch: an internal trie node that may have a token and must have multiple children, but does
|
||||
* not represent an actual input byte[], hence its numOccurrences is 0
|
||||
* <li>Leaf: a node with no children and where numOccurrences is >= 1. It's token represents the
|
||||
* last bytes in the input byte[]s.
|
||||
* <li>Nub: a combination of a branch and leaf. Its token represents the last bytes of input
|
||||
* byte[]s and has numOccurrences >= 1, but it also has child nodes which represent input byte[]s
|
||||
* that add bytes to this nodes input byte[].
|
||||
* </ul>
|
||||
* <br><br>
|
||||
* Example inputs (numInputs=7):
|
||||
* 0: AAA
|
||||
* 1: AAA
|
||||
* 2: AAB
|
||||
* 3: AAB
|
||||
* 4: AAB
|
||||
* 5: AABQQ
|
||||
* 6: AABQQ
|
||||
* <br><br>
|
||||
* Resulting TokenizerNodes:
|
||||
* AA <- branch, numOccurrences=0, tokenStartOffset=0, token.length=2
|
||||
* A <- leaf, numOccurrences=2, tokenStartOffset=2, token.length=1
|
||||
* B <- nub, numOccurrences=3, tokenStartOffset=2, token.length=1
|
||||
* QQ <- leaf, numOccurrences=2, tokenStartOffset=3, token.length=2
|
||||
* <br><br>
|
||||
* numInputs == 7 == sum(numOccurrences) == 0 + 2 + 3 + 2
|
||||
*/
|
||||
@InterfaceAudience.Private
|
||||
public class TokenizerNode{
|
||||
|
||||
/*
|
||||
* Ref to data structure wrapper
|
||||
*/
|
||||
protected Tokenizer builder;
|
||||
|
||||
/******************************************************************
|
||||
* Tree content/structure used during tokenization
|
||||
* ****************************************************************/
|
||||
|
||||
/*
|
||||
* ref to parent trie node
|
||||
*/
|
||||
protected TokenizerNode parent;
|
||||
|
||||
/*
|
||||
* node depth in trie, irrespective of each node's token length
|
||||
*/
|
||||
protected int nodeDepth;
|
||||
|
||||
/*
|
||||
* start index of this token in original byte[]
|
||||
*/
|
||||
protected int tokenStartOffset;
|
||||
|
||||
/*
|
||||
* bytes for this trie node. can be length 0 in root node
|
||||
*/
|
||||
protected ByteRange token;
|
||||
|
||||
/*
|
||||
* A count of occurrences in the input byte[]s, not the trie structure. 0 for branch nodes, 1+ for
|
||||
* nubs and leaves. If the same byte[] is added to the trie multiple times, this is the only thing
|
||||
* that changes in the tokenizer. As a result, duplicate byte[]s are very inexpensive to encode.
|
||||
*/
|
||||
protected int numOccurrences;
|
||||
|
||||
/*
|
||||
* The maximum fan-out of a byte[] trie is 256, so there are a maximum of 256
|
||||
* child nodes.
|
||||
*/
|
||||
protected ArrayList<TokenizerNode> children;
|
||||
|
||||
|
||||
/*
|
||||
* Fields used later in the encoding process for sorting the nodes into the order they'll be
|
||||
* written to the output byte[]. With these fields, the TokenizerNode and therefore Tokenizer
|
||||
* are not generic data structures but instead are specific to HBase PrefixTree encoding.
|
||||
*/
|
||||
|
||||
/*
|
||||
* unique id assigned to each TokenizerNode
|
||||
*/
|
||||
protected long id;
|
||||
|
||||
/*
|
||||
* set >=0 for nubs and leaves
|
||||
*/
|
||||
protected int firstInsertionIndex = -1;
|
||||
|
||||
/*
|
||||
* A positive value indicating how many bytes before the end of the block this node will start. If
|
||||
* the section is 55 bytes and negativeOffset is 9, then the node will start at 46.
|
||||
*/
|
||||
protected int negativeIndex = 0;
|
||||
|
||||
/*
|
||||
* The offset in the output array at which to start writing this node's token bytes. Influenced
|
||||
* by the lengths of all tokens sorted before this one.
|
||||
*/
|
||||
protected int outputArrayOffset = -1;
|
||||
|
||||
|
||||
/*********************** construct *****************************/
|
||||
|
||||
public TokenizerNode(Tokenizer builder, TokenizerNode parent, int nodeDepth,
|
||||
int tokenStartOffset, int tokenOffset, int tokenLength) {
|
||||
this.token = new SimpleMutableByteRange();
|
||||
reconstruct(builder, parent, nodeDepth, tokenStartOffset, tokenOffset, tokenLength);
|
||||
this.children = Lists.newArrayList();
|
||||
}
|
||||
|
||||
/*
|
||||
* Sub-constructor for initializing all fields without allocating a new object. Used by the
|
||||
* regular constructor.
|
||||
*/
|
||||
public void reconstruct(Tokenizer builder, TokenizerNode parent, int nodeDepth,
|
||||
int tokenStartOffset, int tokenOffset, int tokenLength) {
|
||||
this.builder = builder;
|
||||
this.id = builder.nextNodeId();
|
||||
this.parent = parent;
|
||||
this.nodeDepth = nodeDepth;
|
||||
builder.submitMaxNodeDepthCandidate(nodeDepth);
|
||||
this.tokenStartOffset = tokenStartOffset;
|
||||
this.token.set(builder.tokens, tokenOffset, tokenLength);
|
||||
this.numOccurrences = 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Clear the state of this node so that it looks like it was just allocated.
|
||||
*/
|
||||
public void reset() {
|
||||
builder = null;
|
||||
parent = null;
|
||||
nodeDepth = 0;
|
||||
tokenStartOffset = 0;
|
||||
token.unset();
|
||||
numOccurrences = 0;
|
||||
children.clear();// branches & nubs
|
||||
|
||||
// ids/offsets. used during writing to byte[]
|
||||
id = 0;
|
||||
firstInsertionIndex = -1;// set >=0 for nubs and leaves
|
||||
negativeIndex = 0;
|
||||
outputArrayOffset = -1;
|
||||
}
|
||||
|
||||
|
||||
/************************* building *********************************/
|
||||
|
||||
/*
|
||||
* <li>Only public method used during the tokenization process
|
||||
* <li>Requires that the input ByteRange sort after the previous, and therefore after all previous
|
||||
* inputs
|
||||
* <li>Only looks at bytes of the input array that align with this node's token
|
||||
*/
|
||||
public void addSorted(final ByteRange bytes) {// recursively build the tree
|
||||
|
||||
/*
|
||||
* Recurse deeper into the existing trie structure
|
||||
*/
|
||||
if (matchesToken(bytes) && CollectionUtils.notEmpty(children)) {
|
||||
TokenizerNode lastChild = CollectionUtils.getLast(children);
|
||||
if (lastChild.partiallyMatchesToken(bytes)) {
|
||||
lastChild.addSorted(bytes);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Recursion ended. We must either
|
||||
* <li>1: increment numOccurrences if this input was equal to the previous
|
||||
* <li>2: convert this node from a leaf to a nub, and add a new child leaf
|
||||
* <li>3: split this node into a branch and leaf, and then add a second leaf
|
||||
*/
|
||||
|
||||
// add it as a child of this node
|
||||
int numIdenticalTokenBytes = numIdenticalBytes(bytes);// should be <= token.length
|
||||
int tailOffset = tokenStartOffset + numIdenticalTokenBytes;
|
||||
int tailLength = bytes.getLength() - tailOffset;
|
||||
|
||||
if (numIdenticalTokenBytes == token.getLength()) {
|
||||
if (tailLength == 0) {// identical to this node (case 1)
|
||||
incrementNumOccurrences(1);
|
||||
} else {// identical to this node, but with a few extra tailing bytes. (leaf -> nub) (case 2)
|
||||
int childNodeDepth = nodeDepth + 1;
|
||||
int childTokenStartOffset = tokenStartOffset + numIdenticalTokenBytes;
|
||||
TokenizerNode newChildNode = builder.addNode(this, childNodeDepth, childTokenStartOffset,
|
||||
bytes, tailOffset);
|
||||
addChild(newChildNode);
|
||||
}
|
||||
} else {//numIdenticalBytes > 0, split into branch/leaf and then add second leaf (case 3)
|
||||
split(numIdenticalTokenBytes, bytes);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
protected void addChild(TokenizerNode node) {
|
||||
node.setParent(this);
|
||||
children.add(node);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Called when we need to convert a leaf node into a branch with 2 leaves. Comments inside the
|
||||
* method assume we have token BAA starting at tokenStartOffset=0 and are adding BOO. The output
|
||||
* will be 3 nodes:<br>
|
||||
* <ul>
|
||||
* <li>1: B <- branch
|
||||
* <li>2: AA <- leaf
|
||||
* <li>3: OO <- leaf
|
||||
* </ul>
|
||||
*
|
||||
* @param numTokenBytesToRetain => 1 (the B)
|
||||
* @param bytes => BOO
|
||||
*/
|
||||
protected void split(int numTokenBytesToRetain, final ByteRange bytes) {
|
||||
int childNodeDepth = nodeDepth;
|
||||
int childTokenStartOffset = tokenStartOffset + numTokenBytesToRetain;
|
||||
|
||||
//create leaf AA
|
||||
TokenizerNode firstChild = builder.addNode(this, childNodeDepth, childTokenStartOffset,
|
||||
token, numTokenBytesToRetain);
|
||||
firstChild.setNumOccurrences(numOccurrences);// do before clearing this node's numOccurrences
|
||||
token.setLength(numTokenBytesToRetain);//shorten current token from BAA to B
|
||||
numOccurrences = 0;//current node is now a branch
|
||||
|
||||
moveChildrenToDifferentParent(firstChild);//point the new leaf (AA) to the new branch (B)
|
||||
addChild(firstChild);//add the new leaf (AA) to the branch's (B's) children
|
||||
|
||||
//create leaf OO
|
||||
TokenizerNode secondChild = builder.addNode(this, childNodeDepth, childTokenStartOffset,
|
||||
bytes, tokenStartOffset + numTokenBytesToRetain);
|
||||
addChild(secondChild);//add the new leaf (00) to the branch's (B's) children
|
||||
|
||||
// we inserted branch node B as a new level above/before the two children, so increment the
|
||||
// depths of the children below
|
||||
firstChild.incrementNodeDepthRecursively();
|
||||
secondChild.incrementNodeDepthRecursively();
|
||||
}
|
||||
|
||||
|
||||
protected void incrementNodeDepthRecursively() {
|
||||
++nodeDepth;
|
||||
builder.submitMaxNodeDepthCandidate(nodeDepth);
|
||||
for (int i = 0; i < children.size(); ++i) {
|
||||
children.get(i).incrementNodeDepthRecursively();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
protected void moveChildrenToDifferentParent(TokenizerNode newParent) {
|
||||
for (int i = 0; i < children.size(); ++i) {
|
||||
TokenizerNode child = children.get(i);
|
||||
child.setParent(newParent);
|
||||
newParent.children.add(child);
|
||||
}
|
||||
children.clear();
|
||||
}
|
||||
|
||||
|
||||
/************************ byte[] utils *************************/
|
||||
|
||||
protected boolean partiallyMatchesToken(ByteRange bytes) {
|
||||
return numIdenticalBytes(bytes) > 0;
|
||||
}
|
||||
|
||||
protected boolean matchesToken(ByteRange bytes) {
|
||||
return numIdenticalBytes(bytes) == getTokenLength();
|
||||
}
|
||||
|
||||
protected int numIdenticalBytes(ByteRange bytes) {
|
||||
return ByteRangeUtils.numEqualPrefixBytes(token, bytes, tokenStartOffset);
|
||||
}
|
||||
|
||||
|
||||
/***************** moving nodes around ************************/
|
||||
|
||||
public void appendNodesToExternalList(List<TokenizerNode> appendTo, boolean includeNonLeaves,
|
||||
boolean includeLeaves) {
|
||||
if (includeNonLeaves && !isLeaf() || includeLeaves && isLeaf()) {
|
||||
appendTo.add(this);
|
||||
}
|
||||
for (int i = 0; i < children.size(); ++i) {
|
||||
TokenizerNode child = children.get(i);
|
||||
child.appendNodesToExternalList(appendTo, includeNonLeaves, includeLeaves);
|
||||
}
|
||||
}
|
||||
|
||||
public int setInsertionIndexes(int nextIndex) {
|
||||
int newNextIndex = nextIndex;
|
||||
if (hasOccurrences()) {
|
||||
setFirstInsertionIndex(nextIndex);
|
||||
newNextIndex += numOccurrences;
|
||||
}
|
||||
for (int i = 0; i < children.size(); ++i) {
|
||||
TokenizerNode child = children.get(i);
|
||||
newNextIndex = child.setInsertionIndexes(newNextIndex);
|
||||
}
|
||||
return newNextIndex;
|
||||
}
|
||||
|
||||
public void appendOutputArrayOffsets(List<Integer> offsets) {
|
||||
if (hasOccurrences()) {
|
||||
offsets.add(outputArrayOffset);
|
||||
}
|
||||
for (int i = 0; i < children.size(); ++i) {
|
||||
TokenizerNode child = children.get(i);
|
||||
child.appendOutputArrayOffsets(offsets);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/***************** searching *********************************/
|
||||
|
||||
/*
|
||||
* Do a trie style search through the tokenizer. One option for looking up families or qualifiers
|
||||
* during encoding, but currently unused in favor of tracking this information as they are added.
|
||||
*
|
||||
* Keeping code pending further performance testing.
|
||||
*/
|
||||
public void getNode(TokenizerRowSearchResult resultHolder, byte[] key, int keyOffset,
|
||||
int keyLength) {
|
||||
int thisNodeDepthPlusLength = tokenStartOffset + token.getLength();
|
||||
|
||||
// quick check if the key is shorter than this node (may not work for binary search)
|
||||
if (CollectionUtils.isEmpty(children)) {
|
||||
if (thisNodeDepthPlusLength < keyLength) {// ran out of bytes
|
||||
resultHolder.set(TokenizerRowSearchPosition.NO_MATCH, null);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// all token bytes must match
|
||||
for (int i = 0; i < token.getLength(); ++i) {
|
||||
if (key[tokenStartOffset + keyOffset + i] != token.get(i)) {
|
||||
// TODO return whether it's before or after so we can binary search
|
||||
resultHolder.set(TokenizerRowSearchPosition.NO_MATCH, null);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if (thisNodeDepthPlusLength == keyLength && numOccurrences > 0) {
|
||||
resultHolder.set(TokenizerRowSearchPosition.MATCH, this);// MATCH
|
||||
return;
|
||||
}
|
||||
|
||||
if (CollectionUtils.notEmpty(children)) {
|
||||
// TODO binary search the children
|
||||
for (int i = 0; i < children.size(); ++i) {
|
||||
TokenizerNode child = children.get(i);
|
||||
child.getNode(resultHolder, key, keyOffset, keyLength);
|
||||
if (resultHolder.isMatch()) {
|
||||
return;
|
||||
} else if (resultHolder.getDifference() == TokenizerRowSearchPosition.BEFORE) {
|
||||
// passed it, so it doesn't exist
|
||||
resultHolder.set(TokenizerRowSearchPosition.NO_MATCH, null);
|
||||
return;
|
||||
}
|
||||
// key is still AFTER the current node, so continue searching
|
||||
}
|
||||
}
|
||||
|
||||
// checked all children (or there were no children), and didn't find it
|
||||
resultHolder.set(TokenizerRowSearchPosition.NO_MATCH, null);
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
/****************** writing back to byte[]'s *************************/
|
||||
|
||||
public byte[] getNewByteArray() {
|
||||
byte[] arrayToFill = new byte[tokenStartOffset + token.getLength()];
|
||||
fillInBytes(arrayToFill);
|
||||
return arrayToFill;
|
||||
}
|
||||
|
||||
public void fillInBytes(byte[] arrayToFill) {
|
||||
for (int i = 0; i < token.getLength(); ++i) {
|
||||
arrayToFill[tokenStartOffset + i] = token.get(i);
|
||||
}
|
||||
if (parent != null) {
|
||||
parent.fillInBytes(arrayToFill);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/************************** printing ***********************/
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
String s = "";
|
||||
if (parent == null) {
|
||||
s += "R ";
|
||||
} else {
|
||||
s += getBnlIndicator(false) + " " + Bytes.toString(parent.getNewByteArray());
|
||||
}
|
||||
s += "[" + Bytes.toString(token.deepCopyToNewArray()) + "]";
|
||||
if (numOccurrences > 0) {
|
||||
s += "x" + numOccurrences;
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
public String getPaddedTokenAndOccurrenceString() {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
sb.append(getBnlIndicator(true));
|
||||
sb.append(Strings.padFront(numOccurrences + "", ' ', 3));
|
||||
sb.append(Strings.padFront(nodeDepth + "", ' ', 3));
|
||||
if (outputArrayOffset >= 0) {
|
||||
sb.append(Strings.padFront(outputArrayOffset + "", ' ', 3));
|
||||
}
|
||||
sb.append(" ");
|
||||
for (int i = 0; i < tokenStartOffset; ++i) {
|
||||
sb.append(" ");
|
||||
}
|
||||
sb.append(Bytes.toString(token.deepCopyToNewArray()).replaceAll(" ", "_"));
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
public String getBnlIndicator(boolean indent) {
|
||||
if (indent) {
|
||||
if (isNub()) {
|
||||
return " N ";
|
||||
}
|
||||
return isBranch() ? "B " : " L";
|
||||
}
|
||||
if (isNub()) {
|
||||
return "N";
|
||||
}
|
||||
return isBranch() ? "B" : "L";
|
||||
}
|
||||
|
||||
|
||||
/********************** count different node types ********************/
|
||||
|
||||
public int getNumBranchNodesIncludingThisNode() {
|
||||
if (isLeaf()) {
|
||||
return 0;
|
||||
}
|
||||
int totalFromThisPlusChildren = isBranch() ? 1 : 0;
|
||||
for (int i = 0; i < children.size(); ++i) {
|
||||
TokenizerNode child = children.get(i);
|
||||
totalFromThisPlusChildren += child.getNumBranchNodesIncludingThisNode();
|
||||
}
|
||||
return totalFromThisPlusChildren;
|
||||
}
|
||||
|
||||
public int getNumNubNodesIncludingThisNode() {
|
||||
if (isLeaf()) {
|
||||
return 0;
|
||||
}
|
||||
int totalFromThisPlusChildren = isNub() ? 1 : 0;
|
||||
for (int i = 0; i < children.size(); ++i) {
|
||||
TokenizerNode child = children.get(i);
|
||||
totalFromThisPlusChildren += child.getNumNubNodesIncludingThisNode();
|
||||
}
|
||||
return totalFromThisPlusChildren;
|
||||
}
|
||||
|
||||
public int getNumLeafNodesIncludingThisNode() {
|
||||
if (isLeaf()) {
|
||||
return 1;
|
||||
}
|
||||
int totalFromChildren = 0;
|
||||
for (int i = 0; i < children.size(); ++i) {
|
||||
TokenizerNode child = children.get(i);
|
||||
totalFromChildren += child.getNumLeafNodesIncludingThisNode();
|
||||
}
|
||||
return totalFromChildren;
|
||||
}
|
||||
|
||||
|
||||
/*********************** simple read-only methods *******************************/
|
||||
|
||||
public int getNodeDepth() {
|
||||
return nodeDepth;
|
||||
}
|
||||
|
||||
public int getTokenLength() {
|
||||
return token.getLength();
|
||||
}
|
||||
|
||||
public boolean hasOccurrences() {
|
||||
return numOccurrences > 0;
|
||||
}
|
||||
|
||||
public boolean isRoot() {
|
||||
return this.parent == null;
|
||||
}
|
||||
|
||||
public int getNumChildren() {
|
||||
return CollectionUtils.nullSafeSize(children);
|
||||
}
|
||||
|
||||
public TokenizerNode getLastChild() {
|
||||
if (CollectionUtils.isEmpty(children)) {
|
||||
return null;
|
||||
}
|
||||
return CollectionUtils.getLast(children);
|
||||
}
|
||||
|
||||
public boolean isLeaf() {
|
||||
return CollectionUtils.isEmpty(children) && hasOccurrences();
|
||||
}
|
||||
|
||||
public boolean isBranch() {
|
||||
return CollectionUtils.notEmpty(children) && !hasOccurrences();
|
||||
}
|
||||
|
||||
public boolean isNub() {
|
||||
return CollectionUtils.notEmpty(children) && hasOccurrences();
|
||||
}
|
||||
|
||||
|
||||
/********************** simple mutation methods *************************/
|
||||
|
||||
/**
|
||||
* Each occurrence > 1 indicates a repeat of the previous entry.
|
||||
* This can be called directly by
|
||||
* an external class without going through the process of detecting a repeat if it is a known
|
||||
* repeat by some external mechanism. PtEncoder uses this when adding cells to a row if it knows
|
||||
* the new cells are part of the current row.
|
||||
* @param d increment by this amount
|
||||
*/
|
||||
public void incrementNumOccurrences(int d) {
|
||||
numOccurrences += d;
|
||||
}
|
||||
|
||||
|
||||
/************************* autogenerated get/set ******************/
|
||||
|
||||
public int getTokenOffset() {
|
||||
return tokenStartOffset;
|
||||
}
|
||||
|
||||
public TokenizerNode getParent() {
|
||||
return parent;
|
||||
}
|
||||
|
||||
public ByteRange getToken() {
|
||||
return token;
|
||||
}
|
||||
|
||||
public int getNumOccurrences() {
|
||||
return numOccurrences;
|
||||
}
|
||||
|
||||
public void setParent(TokenizerNode parent) {
|
||||
this.parent = parent;
|
||||
}
|
||||
|
||||
public void setNumOccurrences(int numOccurrences) {
|
||||
this.numOccurrences = numOccurrences;
|
||||
}
|
||||
|
||||
public ArrayList<TokenizerNode> getChildren() {
|
||||
return children;
|
||||
}
|
||||
|
||||
public long getId() {
|
||||
return id;
|
||||
}
|
||||
|
||||
public int getFirstInsertionIndex() {
|
||||
return firstInsertionIndex;
|
||||
}
|
||||
|
||||
public void setFirstInsertionIndex(int firstInsertionIndex) {
|
||||
this.firstInsertionIndex = firstInsertionIndex;
|
||||
}
|
||||
|
||||
public int getNegativeIndex() {
|
||||
return negativeIndex;
|
||||
}
|
||||
|
||||
public void setNegativeIndex(int negativeIndex) {
|
||||
this.negativeIndex = negativeIndex;
|
||||
}
|
||||
|
||||
public int getOutputArrayOffset() {
|
||||
return outputArrayOffset;
|
||||
}
|
||||
|
||||
public void setOutputArrayOffset(int outputArrayOffset) {
|
||||
this.outputArrayOffset = outputArrayOffset;
|
||||
}
|
||||
|
||||
public void setId(long id) {
|
||||
this.id = id;
|
||||
}
|
||||
|
||||
public void setBuilder(Tokenizer builder) {
|
||||
this.builder = builder;
|
||||
}
|
||||
|
||||
public void setTokenOffset(int tokenOffset) {
|
||||
this.tokenStartOffset = tokenOffset;
|
||||
}
|
||||
|
||||
public void setToken(ByteRange token) {
|
||||
this.token = token;
|
||||
}
|
||||
|
||||
}
|
|
@ -1,38 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.hbase.codec.prefixtree.encode.tokenize;
|
||||
|
||||
import org.apache.yetus.audience.InterfaceAudience;
|
||||
|
||||
|
||||
/**
|
||||
* Warning: currently unused, but code is valid. Pending performance testing on more data sets.
|
||||
*
|
||||
* Where is the key relative to our current position in the tree. For example, the current tree node
|
||||
* is "BEFORE" the key we are seeking
|
||||
*/
|
||||
@InterfaceAudience.Private
|
||||
public enum TokenizerRowSearchPosition {
|
||||
|
||||
AFTER,//the key is after this tree node, so keep searching
|
||||
BEFORE,//in a binary search, this tells us to back up
|
||||
MATCH,//the current node is a full match
|
||||
NO_MATCH,//might as well return a value more informative than null
|
||||
|
||||
}
|
|
@ -1,73 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.hbase.codec.prefixtree.encode.tokenize;
|
||||
|
||||
import org.apache.yetus.audience.InterfaceAudience;
|
||||
|
||||
|
||||
/**
|
||||
* for recursively searching a PtBuilder
|
||||
*/
|
||||
@InterfaceAudience.Private
|
||||
public class TokenizerRowSearchResult{
|
||||
|
||||
/************ fields ************************/
|
||||
|
||||
protected TokenizerRowSearchPosition difference;
|
||||
protected TokenizerNode matchingNode;
|
||||
|
||||
|
||||
/*************** construct *****************/
|
||||
|
||||
public TokenizerRowSearchResult() {
|
||||
}
|
||||
|
||||
public TokenizerRowSearchResult(TokenizerRowSearchPosition difference) {
|
||||
this.difference = difference;
|
||||
}
|
||||
|
||||
public TokenizerRowSearchResult(TokenizerNode matchingNode) {
|
||||
this.difference = TokenizerRowSearchPosition.MATCH;
|
||||
this.matchingNode = matchingNode;
|
||||
}
|
||||
|
||||
|
||||
/*************** methods **********************/
|
||||
|
||||
public boolean isMatch() {
|
||||
return TokenizerRowSearchPosition.MATCH == difference;
|
||||
}
|
||||
|
||||
|
||||
/************* get/set ***************************/
|
||||
|
||||
public TokenizerRowSearchPosition getDifference() {
|
||||
return difference;
|
||||
}
|
||||
|
||||
public TokenizerNode getMatchingNode() {
|
||||
return matchingNode;
|
||||
}
|
||||
|
||||
public void set(TokenizerRowSearchPosition difference, TokenizerNode matchingNode) {
|
||||
this.difference = difference;
|
||||
this.matchingNode = matchingNode;
|
||||
}
|
||||
|
||||
}
|
|
@ -1,67 +0,0 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.hbase.codec.prefixtree.scanner;
|
||||
|
||||
import org.apache.yetus.audience.InterfaceAudience;
|
||||
|
||||
/**
|
||||
* An indicator of the state of the scanner after an operation such as nextCell() or
|
||||
* positionAt(..). For example:
|
||||
* <ul>
|
||||
* <li>In a DataBlockScanner, the AFTER_LAST position indicates to the parent StoreFileScanner that
|
||||
* it should load the next block.</li>
|
||||
* <li>In a StoreFileScanner, the AFTER_LAST position indicates that the file has been exhausted.
|
||||
* </li>
|
||||
* <li>In a RegionScanner, the AFTER_LAST position indicates that the scanner should move to the
|
||||
* next region.</li>
|
||||
* </ul>
|
||||
*/
|
||||
@InterfaceAudience.Private
|
||||
public enum CellScannerPosition {
|
||||
|
||||
/**
|
||||
* getCurrentCell() will NOT return a valid cell. Calling nextCell() will advance to the first
|
||||
* cell.
|
||||
*/
|
||||
BEFORE_FIRST,
|
||||
|
||||
/**
|
||||
* getCurrentCell() will return a valid cell, but it is not the cell requested by positionAt(..),
|
||||
* rather it is the nearest cell before the requested cell.
|
||||
*/
|
||||
BEFORE,
|
||||
|
||||
/**
|
||||
* getCurrentCell() will return a valid cell, and it is exactly the cell that was requested by
|
||||
* positionAt(..).
|
||||
*/
|
||||
AT,
|
||||
|
||||
/**
|
||||
* getCurrentCell() will return a valid cell, but it is not the cell requested by positionAt(..),
|
||||
* rather it is the nearest cell after the requested cell.
|
||||
*/
|
||||
AFTER,
|
||||
|
||||
/**
|
||||
* getCurrentCell() will NOT return a valid cell. Calling nextCell() will have no effect.
|
||||
*/
|
||||
AFTER_LAST
|
||||
|
||||
}
|
|
@ -1,118 +0,0 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.hbase.codec.prefixtree.scanner;
|
||||
|
||||
import org.apache.yetus.audience.InterfaceAudience;
|
||||
import org.apache.hadoop.hbase.Cell;
|
||||
|
||||
/**
|
||||
* Methods for seeking to a random {@link Cell} inside a sorted collection of cells. Indicates that
|
||||
* the implementation is able to navigate between cells without iterating through every cell.
|
||||
*/
|
||||
@InterfaceAudience.Private
|
||||
public interface CellSearcher extends ReversibleCellScanner {
|
||||
/**
|
||||
* Reset any state in the scanner so it appears it was freshly opened.
|
||||
*/
|
||||
void resetToBeforeFirstEntry();
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Do everything within this scanner's power to find the key. Look forward and backwards.
|
||||
* </p>
|
||||
* <p>
|
||||
* Abort as soon as we know it can't be found, possibly leaving the Searcher in an invalid state.
|
||||
* </p>
|
||||
* @param key position the CellScanner exactly on this key
|
||||
* @return true if the cell existed and getCurrentCell() holds a valid cell
|
||||
*/
|
||||
boolean positionAt(Cell key);
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Same as positionAt(..), but go to the extra effort of finding the previous key if there's no
|
||||
* exact match.
|
||||
* </p>
|
||||
* @param key position the CellScanner on this key or the closest cell before
|
||||
* @return AT if exact match<br>
|
||||
* BEFORE if on last cell before key<br>
|
||||
* BEFORE_FIRST if key was before the first cell in this scanner's scope
|
||||
*/
|
||||
CellScannerPosition positionAtOrBefore(Cell key);
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Same as positionAt(..), but go to the extra effort of finding the next key if there's no exact
|
||||
* match.
|
||||
* </p>
|
||||
* @param key position the CellScanner on this key or the closest cell after
|
||||
* @return AT if exact match<br>
|
||||
* AFTER if on first cell after key<br>
|
||||
* AFTER_LAST if key was after the last cell in this scanner's scope
|
||||
*/
|
||||
CellScannerPosition positionAtOrAfter(Cell key);
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Note: Added for backwards compatibility with
|
||||
* org.apache.hadoop.hbase.regionserver.KeyValueScanner#reseek(Cell)
|
||||
* </p><p>
|
||||
* Look for the key, but only look after the current position. Probably not needed for an
|
||||
* efficient tree implementation, but is important for implementations without random access such
|
||||
* as unencoded KeyValue blocks.
|
||||
* </p>
|
||||
* @param key position the CellScanner exactly on this key
|
||||
* @return true if getCurrent() holds a valid cell
|
||||
*/
|
||||
boolean seekForwardTo(Cell key);
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Same as seekForwardTo(..), but go to the extra effort of finding the next key if there's no
|
||||
* exact match.
|
||||
* </p>
|
||||
* @param key
|
||||
* @return AT if exact match<br>
|
||||
* AFTER if on first cell after key<br>
|
||||
* AFTER_LAST if key was after the last cell in this scanner's scope
|
||||
*/
|
||||
CellScannerPosition seekForwardToOrBefore(Cell key);
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Same as seekForwardTo(..), but go to the extra effort of finding the next key if there's no
|
||||
* exact match.
|
||||
* </p>
|
||||
* @param key
|
||||
* @return AT if exact match<br>
|
||||
* AFTER if on first cell after key<br>
|
||||
* AFTER_LAST if key was after the last cell in this scanner's scope
|
||||
*/
|
||||
CellScannerPosition seekForwardToOrAfter(Cell key);
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Note: This may not be appropriate to have in the interface. Need to investigate.
|
||||
* </p>
|
||||
* Position the scanner in an invalid state after the last cell: CellScannerPosition.AFTER_LAST.
|
||||
* This is used by tests and for handling certain edge cases.
|
||||
*/
|
||||
void positionAfterLastCell();
|
||||
|
||||
}
|
|
@ -1,55 +0,0 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.hbase.codec.prefixtree.scanner;
|
||||
|
||||
import org.apache.yetus.audience.InterfaceAudience;
|
||||
import org.apache.hadoop.hbase.CellScanner;
|
||||
|
||||
/**
|
||||
* An extension of CellScanner indicating the scanner supports iterating backwards through cells.
|
||||
* <p>
|
||||
* Note: This was not added to suggest that HBase should support client facing reverse Scanners,
|
||||
* but
|
||||
* because some {@link CellSearcher} implementations, namely PrefixTree, need a method of backing
|
||||
* up if the positionAt(..) method goes past the requested cell.
|
||||
*/
|
||||
@InterfaceAudience.Private
|
||||
public interface ReversibleCellScanner extends CellScanner {
|
||||
|
||||
/**
|
||||
* Try to position the scanner one Cell before the current position.
|
||||
* @return true if the operation was successful, meaning getCurrentCell() will return a valid
|
||||
* Cell.<br>
|
||||
* false if there were no previous cells, meaning getCurrentCell() will return null.
|
||||
* Scanner position will be
|
||||
* {@link org.apache.hadoop.hbase.codec.prefixtree.scanner.CellScannerPosition#BEFORE_FIRST}
|
||||
*/
|
||||
boolean previous();
|
||||
|
||||
/**
|
||||
* Try to position the scanner in the row before the current row.
|
||||
* @param endOfRow true for the last cell in the previous row; false for the first cell
|
||||
* @return true if the operation was successful, meaning getCurrentCell() will return a valid
|
||||
* Cell.<br>
|
||||
* false if there were no previous cells, meaning getCurrentCell() will return null.
|
||||
* Scanner position will be
|
||||
* {@link org.apache.hadoop.hbase.codec.prefixtree.scanner.CellScannerPosition#BEFORE_FIRST}
|
||||
*/
|
||||
boolean previousRow(boolean endOfRow);
|
||||
}
|
|
@ -1,181 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.hbase.util.byterange;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.yetus.audience.InterfaceAudience;
|
||||
import org.apache.hadoop.hbase.util.ArrayUtils;
|
||||
import org.apache.hadoop.hbase.util.ByteRange;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
import org.apache.hadoop.hbase.util.SimpleMutableByteRange;
|
||||
|
||||
import org.apache.hadoop.hbase.shaded.com.google.common.collect.Lists;
|
||||
|
||||
/**
|
||||
* Performance oriented class for de-duping and storing arbitrary byte[]'s arriving in non-sorted
|
||||
* order. Appends individual byte[]'s to a single big byte[] to avoid overhead and garbage.
|
||||
* <p>
|
||||
* Current implementations are {@link org.apache.hadoop.hbase.util.byterange.impl.ByteRangeHashSet} and
|
||||
* {@link org.apache.hadoop.hbase.util.byterange.impl.ByteRangeTreeSet}, but other options might be a
|
||||
* trie-oriented ByteRangeTrieSet, etc
|
||||
*/
|
||||
@InterfaceAudience.Private
|
||||
public abstract class ByteRangeSet {
|
||||
|
||||
/******************** fields **********************/
|
||||
|
||||
protected byte[] byteAppender;
|
||||
protected int numBytes;
|
||||
|
||||
protected Map<ByteRange, Integer> uniqueIndexByUniqueRange;
|
||||
|
||||
protected ArrayList<ByteRange> uniqueRanges;
|
||||
protected int numUniqueRanges = 0;
|
||||
|
||||
protected int[] uniqueRangeIndexByInsertionId;
|
||||
protected int numInputs;
|
||||
|
||||
protected List<Integer> sortedIndexByUniqueIndex;
|
||||
protected int[] sortedIndexByInsertionId;
|
||||
protected ArrayList<ByteRange> sortedRanges;
|
||||
|
||||
|
||||
/****************** construct **********************/
|
||||
|
||||
protected ByteRangeSet() {
|
||||
this.byteAppender = new byte[0];
|
||||
this.uniqueRanges = Lists.newArrayList();
|
||||
this.uniqueRangeIndexByInsertionId = new int[0];
|
||||
this.sortedIndexByUniqueIndex = Lists.newArrayList();
|
||||
this.sortedIndexByInsertionId = new int[0];
|
||||
this.sortedRanges = Lists.newArrayList();
|
||||
}
|
||||
|
||||
public void reset() {
|
||||
numBytes = 0;
|
||||
uniqueIndexByUniqueRange.clear();
|
||||
numUniqueRanges = 0;
|
||||
numInputs = 0;
|
||||
sortedIndexByUniqueIndex.clear();
|
||||
sortedRanges.clear();
|
||||
}
|
||||
|
||||
|
||||
/*************** abstract *************************/
|
||||
|
||||
public abstract void addToSortedRanges();
|
||||
|
||||
|
||||
/**************** methods *************************/
|
||||
|
||||
/**
|
||||
* Check if the incoming byte range exists. If not, add it to the backing byteAppender[] and
|
||||
* insert it into the tracking Map uniqueIndexByUniqueRange.
|
||||
*/
|
||||
public void add(ByteRange bytes) {
|
||||
Integer index = uniqueIndexByUniqueRange.get(bytes);
|
||||
if (index == null) {
|
||||
index = store(bytes);
|
||||
}
|
||||
int minLength = numInputs + 1;
|
||||
uniqueRangeIndexByInsertionId = ArrayUtils.growIfNecessary(uniqueRangeIndexByInsertionId,
|
||||
minLength, 2 * minLength);
|
||||
uniqueRangeIndexByInsertionId[numInputs] = index;
|
||||
++numInputs;
|
||||
}
|
||||
|
||||
protected int store(ByteRange bytes) {
|
||||
int indexOfNewElement = numUniqueRanges;
|
||||
if (uniqueRanges.size() <= numUniqueRanges) {
|
||||
uniqueRanges.add(new SimpleMutableByteRange());
|
||||
}
|
||||
ByteRange storedRange = uniqueRanges.get(numUniqueRanges);
|
||||
int neededBytes = numBytes + bytes.getLength();
|
||||
byteAppender = ArrayUtils.growIfNecessary(byteAppender, neededBytes, 2 * neededBytes);
|
||||
bytes.deepCopyTo(byteAppender, numBytes);
|
||||
storedRange.set(byteAppender, numBytes, bytes.getLength());// this isn't valid yet
|
||||
numBytes += bytes.getLength();
|
||||
uniqueIndexByUniqueRange.put(storedRange, indexOfNewElement);
|
||||
int newestUniqueIndex = numUniqueRanges;
|
||||
++numUniqueRanges;
|
||||
return newestUniqueIndex;
|
||||
}
|
||||
|
||||
public ByteRangeSet compile() {
|
||||
addToSortedRanges();
|
||||
for (int i = 0; i < sortedRanges.size(); ++i) {
|
||||
sortedIndexByUniqueIndex.add(null);// need to grow the size
|
||||
}
|
||||
// TODO move this to an invert(int[]) util method
|
||||
for (int i = 0; i < sortedIndexByUniqueIndex.size(); ++i) {
|
||||
int uniqueIndex = uniqueIndexByUniqueRange.get(sortedRanges.get(i));
|
||||
sortedIndexByUniqueIndex.set(uniqueIndex, i);
|
||||
}
|
||||
sortedIndexByInsertionId = ArrayUtils.growIfNecessary(sortedIndexByInsertionId, numInputs,
|
||||
numInputs);
|
||||
for (int i = 0; i < numInputs; ++i) {
|
||||
int uniqueRangeIndex = uniqueRangeIndexByInsertionId[i];
|
||||
int sortedIndex = sortedIndexByUniqueIndex.get(uniqueRangeIndex);
|
||||
sortedIndexByInsertionId[i] = sortedIndex;
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
public int getSortedIndexForInsertionId(int insertionId) {
|
||||
return sortedIndexByInsertionId[insertionId];
|
||||
}
|
||||
|
||||
public int size() {
|
||||
return uniqueIndexByUniqueRange.size();
|
||||
}
|
||||
|
||||
|
||||
/***************** standard methods ************************/
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
int i = 0;
|
||||
for (ByteRange r : sortedRanges) {
|
||||
if (i > 0) {
|
||||
sb.append("\n");
|
||||
}
|
||||
sb.append(i + " " + Bytes.toStringBinary(r.deepCopyToNewArray()));
|
||||
++i;
|
||||
}
|
||||
sb.append("\ntotalSize:" + numBytes);
|
||||
sb.append("\navgSize:" + getAvgSize());
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
|
||||
/**************** get/set *****************************/
|
||||
|
||||
public ArrayList<ByteRange> getSortedRanges() {
|
||||
return sortedRanges;
|
||||
}
|
||||
|
||||
public long getAvgSize() {
|
||||
return numBytes / numUniqueRanges;
|
||||
}
|
||||
|
||||
}
|
|
@ -1,57 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.hbase.util.byterange.impl;
|
||||
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.yetus.audience.InterfaceAudience;
|
||||
import org.apache.hadoop.hbase.util.ByteRange;
|
||||
import org.apache.hadoop.hbase.util.CollectionUtils;
|
||||
import org.apache.hadoop.hbase.util.IterableUtils;
|
||||
import org.apache.hadoop.hbase.util.byterange.ByteRangeSet;
|
||||
|
||||
/**
|
||||
* This is probably the best implementation of ByteRangeSet at the moment, though a HashMap produces
|
||||
* garbage when adding a new element to it. We can probably create a tighter implementation without
|
||||
* pointers or garbage.
|
||||
*/
|
||||
@InterfaceAudience.Private
|
||||
public class ByteRangeHashSet extends ByteRangeSet {
|
||||
|
||||
/************************ constructors *****************************/
|
||||
|
||||
public ByteRangeHashSet() {
|
||||
this.uniqueIndexByUniqueRange = new HashMap<>();
|
||||
}
|
||||
|
||||
public ByteRangeHashSet(List<ByteRange> rawByteArrays) {
|
||||
for (ByteRange in : IterableUtils.nullSafe(rawByteArrays)) {
|
||||
add(in);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void addToSortedRanges() {
|
||||
sortedRanges.addAll(CollectionUtils.nullSafe(uniqueIndexByUniqueRange.keySet()));
|
||||
Collections.sort(sortedRanges);
|
||||
}
|
||||
|
||||
}
|
|
@ -1,54 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.hbase.util.byterange.impl;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.TreeMap;
|
||||
|
||||
import org.apache.yetus.audience.InterfaceAudience;
|
||||
import org.apache.hadoop.hbase.util.ByteRange;
|
||||
import org.apache.hadoop.hbase.util.CollectionUtils;
|
||||
import org.apache.hadoop.hbase.util.IterableUtils;
|
||||
import org.apache.hadoop.hbase.util.byterange.ByteRangeSet;
|
||||
|
||||
/**
|
||||
* Not currently used in production, but here as a benchmark comparison against ByteRangeHashSet.
|
||||
*/
|
||||
@InterfaceAudience.Private
|
||||
public class ByteRangeTreeSet extends ByteRangeSet {
|
||||
|
||||
/************************ constructors *****************************/
|
||||
|
||||
public ByteRangeTreeSet() {
|
||||
this.uniqueIndexByUniqueRange = new TreeMap<>();
|
||||
}
|
||||
|
||||
public ByteRangeTreeSet(List<ByteRange> rawByteArrays) {
|
||||
this();//needed to initialize the TreeSet
|
||||
for(ByteRange in : IterableUtils.nullSafe(rawByteArrays)){
|
||||
add(in);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void addToSortedRanges() {
|
||||
sortedRanges.addAll(CollectionUtils.nullSafe(uniqueIndexByUniqueRange.keySet()));
|
||||
}
|
||||
|
||||
}
|
|
@ -1,117 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.hbase.util.vint;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.OutputStream;
|
||||
|
||||
import org.apache.yetus.audience.InterfaceAudience;
|
||||
import org.apache.hadoop.hbase.nio.ByteBuff;
|
||||
|
||||
/**
|
||||
* UFInt is an abbreviation for Unsigned Fixed-width Integer.
|
||||
*
|
||||
* This class converts between positive ints and 1-4 bytes that represent the int. All input ints
|
||||
* must be positive. Max values stored in N bytes are:
|
||||
*
|
||||
* N=1: 2^8 => 256
|
||||
* N=2: 2^16 => 65,536
|
||||
* N=3: 2^24 => 16,777,216
|
||||
* N=4: 2^31 => 2,147,483,648 (Integer.MAX_VALUE)
|
||||
*
|
||||
* This was created to get most of the memory savings of a variable length integer when encoding
|
||||
* an array of input integers, but to fix the number of bytes for each integer to the number needed
|
||||
* to store the maximum integer in the array. This enables a binary search to be performed on the
|
||||
* array of encoded integers.
|
||||
*
|
||||
* PrefixTree nodes often store offsets into a block that can fit into 1 or 2 bytes. Note that if
|
||||
* the maximum value of an array of numbers needs 2 bytes, then it's likely that a majority of the
|
||||
* numbers will also require 2 bytes.
|
||||
*
|
||||
* warnings:
|
||||
* * no input validation for max performance
|
||||
* * no negatives
|
||||
*/
|
||||
@InterfaceAudience.Private
|
||||
public class UFIntTool {
|
||||
|
||||
private static final int NUM_BITS_IN_LONG = 64;
|
||||
|
||||
public static long maxValueForNumBytes(int numBytes) {
|
||||
return (1L << (numBytes * 8)) - 1;
|
||||
}
|
||||
|
||||
public static int numBytes(final long value) {
|
||||
if (value == 0) {// 0 doesn't work with the formula below
|
||||
return 1;
|
||||
}
|
||||
return (NUM_BITS_IN_LONG + 7 - Long.numberOfLeadingZeros(value)) / 8;
|
||||
}
|
||||
|
||||
public static byte[] getBytes(int outputWidth, final long value) {
|
||||
byte[] bytes = new byte[outputWidth];
|
||||
writeBytes(outputWidth, value, bytes, 0);
|
||||
return bytes;
|
||||
}
|
||||
|
||||
public static void writeBytes(int outputWidth, final long value, byte[] bytes, int offset) {
|
||||
bytes[offset + outputWidth - 1] = (byte) value;
|
||||
for (int i = outputWidth - 2; i >= 0; --i) {
|
||||
bytes[offset + i] = (byte) (value >>> (outputWidth - i - 1) * 8);
|
||||
}
|
||||
}
|
||||
|
||||
private static final long[] MASKS = new long[] {
|
||||
(long) 255,
|
||||
(long) 255 << 8,
|
||||
(long) 255 << 16,
|
||||
(long) 255 << 24,
|
||||
(long) 255 << 32,
|
||||
(long) 255 << 40,
|
||||
(long) 255 << 48,
|
||||
(long) 255 << 56
|
||||
};
|
||||
|
||||
public static void writeBytes(int outputWidth, final long value, OutputStream os) throws IOException {
|
||||
for (int i = outputWidth - 1; i >= 0; --i) {
|
||||
os.write((byte) ((value & MASKS[i]) >>> (8 * i)));
|
||||
}
|
||||
}
|
||||
|
||||
public static long fromBytes(final byte[] bytes) {
|
||||
long value = 0;
|
||||
value |= bytes[0] & 0xff;// these seem to do ok without casting the byte to int
|
||||
for (int i = 1; i < bytes.length; ++i) {
|
||||
value <<= 8;
|
||||
value |= bytes[i] & 0xff;
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
public static long fromBytes(final ByteBuff buf, final int offset, final int width) {
|
||||
long value = 0;
|
||||
value |= buf.get(offset + 0) & 0xff;// these seem to do ok without casting the byte to int
|
||||
for (int i = 1; i < width; ++i) {
|
||||
value <<= 8;
|
||||
value |= buf.get(i + offset) & 0xff;
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
}
|
|
@ -1,112 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.hbase.util.vint;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.OutputStream;
|
||||
|
||||
import org.apache.yetus.audience.InterfaceAudience;
|
||||
import org.apache.hadoop.hbase.nio.ByteBuff;
|
||||
|
||||
/**
|
||||
* Simple Variable Length Integer encoding. Left bit of 0 means we are on the last byte. If left
|
||||
* bit of the current byte is 1, then there is at least one more byte.
|
||||
*/
|
||||
@InterfaceAudience.Private
|
||||
public class UVIntTool {
|
||||
|
||||
public static final byte
|
||||
BYTE_7_RIGHT_BITS_SET = 127,
|
||||
BYTE_LEFT_BIT_SET = -128;
|
||||
|
||||
public static final long
|
||||
INT_7_RIGHT_BITS_SET = 127,
|
||||
INT_8TH_BIT_SET = 128;
|
||||
|
||||
public static final byte[]
|
||||
MAX_VALUE_BYTES = new byte[] { -1, -1, -1, -1, 7 };
|
||||
|
||||
/********************* int -> bytes **************************/
|
||||
|
||||
public static int numBytes(int in) {
|
||||
if (in == 0) {
|
||||
// doesn't work with the formula below
|
||||
return 1;
|
||||
}
|
||||
return (38 - Integer.numberOfLeadingZeros(in)) / 7;// 38 comes from 32+(7-1)
|
||||
}
|
||||
|
||||
public static byte[] getBytes(int value) {
|
||||
int numBytes = numBytes(value);
|
||||
byte[] bytes = new byte[numBytes];
|
||||
int remainder = value;
|
||||
for (int i = 0; i < numBytes - 1; ++i) {
|
||||
// set the left bit
|
||||
bytes[i] = (byte) ((remainder & INT_7_RIGHT_BITS_SET) | INT_8TH_BIT_SET);
|
||||
remainder >>= 7;
|
||||
}
|
||||
// do not set the left bit
|
||||
bytes[numBytes - 1] = (byte) (remainder & INT_7_RIGHT_BITS_SET);
|
||||
return bytes;
|
||||
}
|
||||
|
||||
public static int writeBytes(int value, OutputStream os) throws IOException {
|
||||
int numBytes = numBytes(value);
|
||||
int remainder = value;
|
||||
for (int i = 0; i < numBytes - 1; ++i) {
|
||||
// set the left bit
|
||||
os.write((byte) ((remainder & INT_7_RIGHT_BITS_SET) | INT_8TH_BIT_SET));
|
||||
remainder >>= 7;
|
||||
}
|
||||
// do not set the left bit
|
||||
os.write((byte) (remainder & INT_7_RIGHT_BITS_SET));
|
||||
return numBytes;
|
||||
}
|
||||
|
||||
/******************** bytes -> int **************************/
|
||||
|
||||
public static int getInt(ByteBuff buffer, int offset) {
|
||||
int value = 0;
|
||||
for (int i = 0;; ++i) {
|
||||
byte b = buffer.get(offset + i);
|
||||
int shifted = BYTE_7_RIGHT_BITS_SET & b;// kill leftmost bit
|
||||
shifted <<= 7 * i;
|
||||
value |= shifted;
|
||||
if (b >= 0) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
public static int getInt(InputStream is) throws IOException {
|
||||
int value = 0;
|
||||
int i = 0;
|
||||
int b;
|
||||
do{
|
||||
b = is.read();
|
||||
int shifted = BYTE_7_RIGHT_BITS_SET & b;// kill leftmost bit
|
||||
shifted <<= 7 * i;
|
||||
value |= shifted;
|
||||
++i;
|
||||
}while(b > Byte.MAX_VALUE);
|
||||
return value;
|
||||
}
|
||||
}
|
|
@ -1,116 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.hbase.util.vint;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.OutputStream;
|
||||
import java.nio.ByteBuffer;
|
||||
|
||||
import org.apache.yetus.audience.InterfaceAudience;
|
||||
import org.apache.hadoop.hbase.nio.ByteBuff;
|
||||
import org.apache.hadoop.hbase.nio.SingleByteBuff;
|
||||
|
||||
/**
|
||||
* Simple Variable Length Integer encoding. Left bit of 0 means we are on the last byte. If left
|
||||
* bit of the current byte is 1, then there is at least one more byte.
|
||||
*/
|
||||
@InterfaceAudience.Private
|
||||
public class UVLongTool{
|
||||
|
||||
public static final byte
|
||||
BYTE_7_RIGHT_BITS_SET = 127,
|
||||
BYTE_LEFT_BIT_SET = -128;
|
||||
|
||||
public static final long
|
||||
LONG_7_RIGHT_BITS_SET = 127,
|
||||
LONG_8TH_BIT_SET = 128;
|
||||
|
||||
public static final byte[]
|
||||
MAX_VALUE_BYTES = new byte[] { -1, -1, -1, -1, -1, -1, -1, -1, 127 };
|
||||
|
||||
|
||||
/********************* long -> bytes **************************/
|
||||
|
||||
public static int numBytes(long in) {// do a check for illegal arguments if not protected
|
||||
if (in == 0) {
|
||||
return 1;
|
||||
}// doesn't work with the formula below
|
||||
return (70 - Long.numberOfLeadingZeros(in)) / 7;// 70 comes from 64+(7-1)
|
||||
}
|
||||
|
||||
public static byte[] getBytes(long value) {
|
||||
int numBytes = numBytes(value);
|
||||
byte[] bytes = new byte[numBytes];
|
||||
long remainder = value;
|
||||
for (int i = 0; i < numBytes - 1; ++i) {
|
||||
bytes[i] = (byte) ((remainder & LONG_7_RIGHT_BITS_SET) | LONG_8TH_BIT_SET);// set the left bit
|
||||
remainder >>= 7;
|
||||
}
|
||||
bytes[numBytes - 1] = (byte) (remainder & LONG_7_RIGHT_BITS_SET);// do not set the left bit
|
||||
return bytes;
|
||||
}
|
||||
|
||||
public static int writeBytes(long value, OutputStream os) throws IOException {
|
||||
int numBytes = numBytes(value);
|
||||
long remainder = value;
|
||||
for (int i = 0; i < numBytes - 1; ++i) {
|
||||
// set the left bit
|
||||
os.write((byte) ((remainder & LONG_7_RIGHT_BITS_SET) | LONG_8TH_BIT_SET));
|
||||
remainder >>= 7;
|
||||
}
|
||||
// do not set the left bit
|
||||
os.write((byte) (remainder & LONG_7_RIGHT_BITS_SET));
|
||||
return numBytes;
|
||||
}
|
||||
|
||||
/******************** bytes -> long **************************/
|
||||
|
||||
public static long getLong(byte[] bytes) {
|
||||
return getLong(new SingleByteBuff(ByteBuffer.wrap(bytes)), 0);
|
||||
}
|
||||
|
||||
public static long getLong(ByteBuff buf, int offset) {
|
||||
long value = 0;
|
||||
for (int i = 0;; ++i) {
|
||||
byte b = buf.get(offset + i);
|
||||
long shifted = BYTE_7_RIGHT_BITS_SET & b;// kill leftmost bit
|
||||
shifted <<= 7 * i;
|
||||
value |= shifted;
|
||||
if (b >= 0) {
|
||||
break;
|
||||
}// first bit was 0, so that's the last byte in the VarLong
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
public static long getLong(InputStream is) throws IOException {
|
||||
long value = 0;
|
||||
int i = 0;
|
||||
int b;
|
||||
do {
|
||||
b = is.read();
|
||||
long shifted = BYTE_7_RIGHT_BITS_SET & b;// kill leftmost bit
|
||||
shifted <<= 7 * i;
|
||||
value |= shifted;
|
||||
++i;
|
||||
} while (b > Byte.MAX_VALUE);
|
||||
return value;
|
||||
}
|
||||
}
|
|
@ -1,65 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.hbase.codec.keyvalue;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.hadoop.hbase.KeyValue;
|
||||
import org.apache.hadoop.hbase.KeyValueTestUtil;
|
||||
import org.apache.hadoop.hbase.testclassification.MiscTests;
|
||||
import org.apache.hadoop.hbase.testclassification.SmallTests;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.row.TestRowData;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.row.data.TestRowDataRandomKeyValuesWithTags;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.row.data.TestRowDataTrivialWithTags;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Assume;
|
||||
import org.junit.Test;
|
||||
import org.junit.experimental.categories.Category;
|
||||
import org.junit.runner.RunWith;
|
||||
import org.junit.runners.Parameterized;
|
||||
import org.junit.runners.Parameterized.Parameters;
|
||||
|
||||
import static org.junit.Assume.assumeFalse;
|
||||
|
||||
@Category({MiscTests.class, SmallTests.class})
|
||||
@RunWith(Parameterized.class)
|
||||
public class TestKeyValueTool {
|
||||
|
||||
@Parameters
|
||||
public static Collection<Object[]> parameters() {
|
||||
return TestRowData.InMemory.getAllAsObjectArray();
|
||||
}
|
||||
|
||||
@Parameterized.Parameter
|
||||
public TestRowData rows;
|
||||
|
||||
@Test
|
||||
public void testRoundTripToBytes() {
|
||||
assumeFalse(rows instanceof TestRowDataTrivialWithTags);
|
||||
assumeFalse(rows instanceof TestRowDataRandomKeyValuesWithTags);
|
||||
|
||||
List<KeyValue> kvs = rows.getInputs();
|
||||
ByteBuffer bb = KeyValueTestUtil.toByteBufferAndRewind(kvs, false);
|
||||
List<KeyValue> roundTrippedKvs = KeyValueTestUtil.rewindThenToList(bb, false, false);
|
||||
Assert.assertArrayEquals(kvs.toArray(), roundTrippedKvs.toArray());
|
||||
}
|
||||
}
|
|
@ -1,27 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.hbase.codec.prefixtree;
|
||||
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
|
||||
public class PrefixTreeTestConstants {
|
||||
|
||||
public static final byte[] TEST_CF = Bytes.toBytes("cfDefault");
|
||||
|
||||
}
|
|
@ -1,91 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.hbase.codec.prefixtree.blockmeta;
|
||||
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.nio.ByteBuffer;
|
||||
|
||||
import org.apache.hadoop.hbase.KeyValue;
|
||||
import org.apache.hadoop.hbase.nio.SingleByteBuff;
|
||||
import org.apache.hadoop.hbase.testclassification.MiscTests;
|
||||
import org.apache.hadoop.hbase.testclassification.SmallTests;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeBlockMeta;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
import org.junit.experimental.categories.Category;
|
||||
|
||||
@Category({MiscTests.class, SmallTests.class})
|
||||
public class TestBlockMeta {
|
||||
|
||||
static int BLOCK_START = 123;
|
||||
|
||||
private static PrefixTreeBlockMeta createSample() {
|
||||
PrefixTreeBlockMeta m = new PrefixTreeBlockMeta();
|
||||
m.setNumMetaBytes(0);
|
||||
m.setNumKeyValueBytes(3195);
|
||||
|
||||
m.setNumRowBytes(0);
|
||||
m.setNumFamilyBytes(3);
|
||||
m.setNumQualifierBytes(12345);
|
||||
m.setNumTagsBytes(50);
|
||||
m.setNumTimestampBytes(23456);
|
||||
m.setNumMvccVersionBytes(5);
|
||||
m.setNumValueBytes(34567);
|
||||
|
||||
m.setNextNodeOffsetWidth(3);
|
||||
m.setFamilyOffsetWidth(1);
|
||||
m.setQualifierOffsetWidth(2);
|
||||
m.setTagsOffsetWidth(2);
|
||||
m.setTimestampIndexWidth(1);
|
||||
m.setMvccVersionIndexWidth(2);
|
||||
m.setValueOffsetWidth(8);
|
||||
m.setValueLengthWidth(3);
|
||||
|
||||
m.setRowTreeDepth(11);
|
||||
m.setMaxRowLength(200);
|
||||
m.setMaxQualifierLength(50);
|
||||
m.setMaxTagsLength(40);
|
||||
|
||||
m.setMinTimestamp(1318966363481L);
|
||||
m.setTimestampDeltaWidth(3);
|
||||
m.setMinMvccVersion(100L);
|
||||
m.setMvccVersionDeltaWidth(4);
|
||||
|
||||
m.setAllSameType(false);
|
||||
m.setAllTypes(KeyValue.Type.Delete.getCode());
|
||||
|
||||
m.setNumUniqueRows(88);
|
||||
m.setNumUniqueFamilies(1);
|
||||
m.setNumUniqueQualifiers(56);
|
||||
m.setNumUniqueTags(5);
|
||||
return m;
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testStreamSerialization() throws IOException {
|
||||
PrefixTreeBlockMeta original = createSample();
|
||||
ByteArrayOutputStream os = new ByteArrayOutputStream(10000);
|
||||
original.writeVariableBytesToOutputStream(os);
|
||||
ByteBuffer buffer = ByteBuffer.wrap(os.toByteArray());
|
||||
PrefixTreeBlockMeta roundTripped = new PrefixTreeBlockMeta(new SingleByteBuff(buffer));
|
||||
Assert.assertTrue(original.equals(roundTripped));
|
||||
}
|
||||
|
||||
}
|
|
@ -1,78 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.hbase.codec.prefixtree.builder;
|
||||
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.hadoop.hbase.testclassification.MiscTests;
|
||||
import org.apache.hadoop.hbase.testclassification.SmallTests;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.encode.tokenize.Tokenizer;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.encode.tokenize.TokenizerNode;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.encode.tokenize.TokenizerRowSearchResult;
|
||||
import org.apache.hadoop.hbase.util.SimpleMutableByteRange;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
import org.junit.experimental.categories.Category;
|
||||
import org.junit.runner.RunWith;
|
||||
import org.junit.runners.Parameterized;
|
||||
import org.junit.runners.Parameterized.Parameters;
|
||||
|
||||
@Category({MiscTests.class,SmallTests.class})
|
||||
@RunWith(Parameterized.class)
|
||||
public class TestTokenizer {
|
||||
|
||||
@Parameters
|
||||
public static Collection<Object[]> parameters() {
|
||||
return new TestTokenizerData.InMemory().getAllAsObjectArray();
|
||||
}
|
||||
|
||||
private List<byte[]> inputs;
|
||||
private Tokenizer builder;
|
||||
private List<byte[]> roundTripped;
|
||||
|
||||
public TestTokenizer(TestTokenizerData sortedByteArrays) {
|
||||
this.inputs = sortedByteArrays.getInputs();
|
||||
this.builder = new Tokenizer();
|
||||
for (byte[] array : inputs) {
|
||||
builder.addSorted(new SimpleMutableByteRange(array));
|
||||
}
|
||||
this.roundTripped = builder.getArrays();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testReaderRoundTrip() {
|
||||
Assert.assertEquals(inputs.size(), roundTripped.size());
|
||||
Assert.assertTrue(Bytes.isSorted(roundTripped));
|
||||
Assert.assertTrue(Bytes.equals(inputs, roundTripped));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSearching() {
|
||||
for (byte[] input : inputs) {
|
||||
TokenizerRowSearchResult resultHolder = new TokenizerRowSearchResult();
|
||||
builder.getNode(resultHolder, input, 0, input.length);
|
||||
TokenizerNode n = resultHolder.getMatchingNode();
|
||||
byte[] output = n.getNewByteArray();
|
||||
Assert.assertTrue(Bytes.equals(input, output));
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -1,42 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.hbase.codec.prefixtree.builder;
|
||||
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.builder.data.TestTokenizerDataBasic;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.builder.data.TestTokenizerDataEdgeCase;
|
||||
|
||||
import org.apache.hadoop.hbase.shaded.com.google.common.collect.Lists;
|
||||
|
||||
public interface TestTokenizerData {
|
||||
|
||||
List<byte[]> getInputs();
|
||||
List<byte[]> getOutputs();
|
||||
|
||||
class InMemory {
|
||||
public Collection<Object[]> getAllAsObjectArray() {
|
||||
List<Object[]> all = Lists.newArrayList();
|
||||
all.add(new Object[] { new TestTokenizerDataBasic() });
|
||||
all.add(new Object[] { new TestTokenizerDataEdgeCase() });
|
||||
return all;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,90 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.hbase.codec.prefixtree.builder;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
|
||||
import org.apache.hadoop.hbase.testclassification.MiscTests;
|
||||
import org.apache.hadoop.hbase.testclassification.SmallTests;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.encode.tokenize.Tokenizer;
|
||||
import org.apache.hadoop.hbase.util.SimpleMutableByteRange;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
import org.junit.experimental.categories.Category;
|
||||
|
||||
import org.apache.hadoop.hbase.shaded.com.google.common.collect.Lists;
|
||||
|
||||
@Category({MiscTests.class, SmallTests.class})
|
||||
public class TestTreeDepth {
|
||||
|
||||
@Test
|
||||
public void testSingleNode() {
|
||||
List<String> inputs = Lists.newArrayList("a");
|
||||
testInternal(inputs, 1);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSimpleBranch() {
|
||||
List<String> inputs = Lists.newArrayList("a", "aa", "ab");
|
||||
testInternal(inputs, 2);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testEmptyRoot() {
|
||||
List<String> inputs = Lists.newArrayList("a", "b");
|
||||
testInternal(inputs, 2);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testRootAsNub() {
|
||||
List<String> inputs = Lists.newArrayList("a", "aa");
|
||||
testInternal(inputs, 2);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testRootAsNubPlusNub() {
|
||||
List<String> inputs = Lists.newArrayList("a", "aa", "aaa");
|
||||
testInternal(inputs, 3);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testEmptyRootPlusNub() {
|
||||
List<String> inputs = Lists.newArrayList("a", "aa", "b");
|
||||
testInternal(inputs, 3);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSplitDistantAncestor() {
|
||||
List<String> inputs = Lists.newArrayList("a", "ac", "acd", "b");
|
||||
testInternal(inputs, 4);
|
||||
}
|
||||
|
||||
protected void testInternal(List<String> inputs, int expectedTreeDepth) {
|
||||
Tokenizer builder = new Tokenizer();
|
||||
for (String s : inputs) {
|
||||
SimpleMutableByteRange b = new SimpleMutableByteRange(Bytes.toBytes(s));
|
||||
builder.addSorted(b);
|
||||
}
|
||||
Assert.assertEquals(1, builder.getRoot().getNodeDepth());
|
||||
Assert.assertEquals(expectedTreeDepth, builder.getTreeDepth());
|
||||
}
|
||||
|
||||
}
|
|
@ -1,51 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.hbase.codec.prefixtree.builder.data;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.builder.TestTokenizerData;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
|
||||
import org.apache.hadoop.hbase.shaded.com.google.common.collect.Lists;
|
||||
|
||||
public class TestTokenizerDataBasic implements TestTokenizerData {
|
||||
|
||||
static List<byte[]> d = Lists.newArrayList();
|
||||
static {
|
||||
List<String> s = Lists.newArrayList();
|
||||
s.add("abc");// nub
|
||||
s.add("abcde");// leaf
|
||||
s.add("bbc");// causes root to split and have empty token
|
||||
s.add("bbc");// makes numOccurrences=2 on the bbc node
|
||||
s.add("cd");// just to get another node after the numOccurrences=2
|
||||
d = Bytes.getUtf8ByteArrays(s);
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<byte[]> getInputs() {
|
||||
return d;
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<byte[]> getOutputs() {
|
||||
return d;
|
||||
}
|
||||
|
||||
}
|
|
@ -1,53 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.hbase.codec.prefixtree.builder.data;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.builder.TestTokenizerData;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
|
||||
import org.apache.hadoop.hbase.shaded.com.google.common.collect.Lists;
|
||||
|
||||
public class TestTokenizerDataEdgeCase implements TestTokenizerData {
|
||||
|
||||
static List<byte[]> d = Lists.newArrayList();
|
||||
static {
|
||||
/*
|
||||
* tricky little combination because the acegi token will partially match abdfi, but when you
|
||||
* descend into abdfi, it will not fully match
|
||||
*/
|
||||
List<String> s = Lists.newArrayList();
|
||||
s.add("abdfh");
|
||||
s.add("abdfi");
|
||||
s.add("acegi");
|
||||
d = Bytes.getUtf8ByteArrays(s);
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<byte[]> getInputs() {
|
||||
return d;
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<byte[]> getOutputs() {
|
||||
return d;
|
||||
}
|
||||
|
||||
}
|
|
@ -1,127 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.hbase.codec.prefixtree.column;
|
||||
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.hadoop.hbase.nio.SingleByteBuff;
|
||||
import org.apache.hadoop.hbase.testclassification.MiscTests;
|
||||
import org.apache.hadoop.hbase.testclassification.SmallTests;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeBlockMeta;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.decode.column.ColumnReader;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.encode.column.ColumnSectionWriter;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.encode.other.ColumnNodeType;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.encode.tokenize.Tokenizer;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.encode.tokenize.TokenizerNode;
|
||||
import org.apache.hadoop.hbase.util.ByteRange;
|
||||
import org.apache.hadoop.hbase.util.ByteRangeUtils;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
import org.apache.hadoop.hbase.util.byterange.impl.ByteRangeTreeSet;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
import org.junit.experimental.categories.Category;
|
||||
import org.junit.runner.RunWith;
|
||||
import org.junit.runners.Parameterized;
|
||||
import org.junit.runners.Parameterized.Parameters;
|
||||
|
||||
import org.apache.hadoop.hbase.shaded.com.google.common.collect.Lists;
|
||||
|
||||
@Category({MiscTests.class, SmallTests.class})
|
||||
@RunWith(Parameterized.class)
|
||||
public class TestColumnBuilder {
|
||||
|
||||
@Parameters
|
||||
public static Collection<Object[]> parameters() {
|
||||
return new TestColumnData.InMemory().getAllAsObjectArray();
|
||||
}
|
||||
|
||||
/*********** fields **********************************/
|
||||
|
||||
protected TestColumnData columns;
|
||||
protected ByteRangeTreeSet columnSorter;
|
||||
protected List<ByteRange> sortedUniqueColumns;
|
||||
protected PrefixTreeBlockMeta blockMeta;
|
||||
protected Tokenizer builder;
|
||||
protected ColumnSectionWriter writer;
|
||||
protected byte[] bytes;
|
||||
protected byte[] buffer;
|
||||
protected ColumnReader reader;
|
||||
|
||||
/*************** construct ****************************/
|
||||
|
||||
public TestColumnBuilder(TestColumnData columns) {
|
||||
this.columns = columns;
|
||||
List<ByteRange> inputs = columns.getInputs();
|
||||
this.columnSorter = new ByteRangeTreeSet(inputs);
|
||||
this.sortedUniqueColumns = columnSorter.compile().getSortedRanges();
|
||||
List<byte[]> copies = ByteRangeUtils.copyToNewArrays(sortedUniqueColumns);
|
||||
Assert.assertTrue(Bytes.isSorted(copies));
|
||||
this.blockMeta = new PrefixTreeBlockMeta();
|
||||
this.blockMeta.setNumMetaBytes(0);
|
||||
this.blockMeta.setNumRowBytes(0);
|
||||
this.builder = new Tokenizer();
|
||||
}
|
||||
|
||||
/************* methods ********************************/
|
||||
|
||||
@Test
|
||||
public void testReaderRoundTrip() throws IOException {
|
||||
for (int i = 0; i < sortedUniqueColumns.size(); ++i) {
|
||||
ByteRange column = sortedUniqueColumns.get(i);
|
||||
builder.addSorted(column);
|
||||
}
|
||||
List<byte[]> builderOutputArrays = builder.getArrays();
|
||||
for (int i = 0; i < builderOutputArrays.size(); ++i) {
|
||||
byte[] inputArray = sortedUniqueColumns.get(i).deepCopyToNewArray();
|
||||
byte[] outputArray = builderOutputArrays.get(i);
|
||||
boolean same = Bytes.equals(inputArray, outputArray);
|
||||
Assert.assertTrue(same);
|
||||
}
|
||||
Assert.assertEquals(sortedUniqueColumns.size(), builderOutputArrays.size());
|
||||
|
||||
writer = new ColumnSectionWriter(blockMeta, builder, ColumnNodeType.QUALIFIER);
|
||||
ByteArrayOutputStream baos = new ByteArrayOutputStream();
|
||||
writer.compile().writeBytes(baos);
|
||||
bytes = baos.toByteArray();
|
||||
buffer = new byte[blockMeta.getMaxQualifierLength()];
|
||||
reader = new ColumnReader(buffer, ColumnNodeType.QUALIFIER);
|
||||
reader.initOnBlock(blockMeta, new SingleByteBuff(ByteBuffer.wrap(bytes)));
|
||||
|
||||
List<TokenizerNode> builderNodes = Lists.newArrayList();
|
||||
builder.appendNodes(builderNodes, true, true);
|
||||
int i = 0;
|
||||
for (TokenizerNode builderNode : builderNodes) {
|
||||
if (!builderNode.hasOccurrences()) {
|
||||
continue;
|
||||
}
|
||||
Assert.assertEquals(1, builderNode.getNumOccurrences());// we de-duped before adding to
|
||||
// builder
|
||||
int position = builderNode.getOutputArrayOffset();
|
||||
byte[] output = reader.populateBuffer(position).copyBufferToNewArray();
|
||||
boolean same = Bytes.equals(sortedUniqueColumns.get(i).deepCopyToNewArray(), output);
|
||||
Assert.assertTrue(same);
|
||||
++i;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -1,45 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.hbase.codec.prefixtree.column;
|
||||
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.column.data.TestColumnDataRandom;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.column.data.TestColumnDataSimple;
|
||||
import org.apache.hadoop.hbase.util.ByteRange;
|
||||
|
||||
import org.apache.hadoop.hbase.shaded.com.google.common.collect.Lists;
|
||||
|
||||
public interface TestColumnData {
|
||||
|
||||
List<ByteRange> getInputs();
|
||||
List<ByteRange> getOutputs();
|
||||
|
||||
class InMemory {
|
||||
public Collection<Object[]> getAllAsObjectArray() {
|
||||
List<Object[]> all = Lists.newArrayList();
|
||||
all.add(new Object[] { new TestColumnDataSimple() });
|
||||
for (int leftShift = 0; leftShift < 16; ++leftShift) {
|
||||
all.add(new Object[] { new TestColumnDataRandom(1 << leftShift) });
|
||||
}
|
||||
return all;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,63 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.hbase.codec.prefixtree.column.data;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.hadoop.hbase.CellUtil;
|
||||
import org.apache.hadoop.hbase.KeyValue;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.column.TestColumnData;
|
||||
import org.apache.hadoop.hbase.util.ByteRange;
|
||||
import org.apache.hadoop.hbase.util.SimpleMutableByteRange;
|
||||
import org.apache.hadoop.hbase.util.byterange.ByteRangeSet;
|
||||
import org.apache.hadoop.hbase.util.byterange.impl.ByteRangeTreeSet;
|
||||
import org.apache.hadoop.hbase.util.RedundantKVGenerator;
|
||||
|
||||
import org.apache.hadoop.hbase.shaded.com.google.common.collect.Lists;
|
||||
|
||||
public class TestColumnDataRandom implements TestColumnData {
|
||||
|
||||
private List<ByteRange> inputs = Lists.newArrayList();
|
||||
private List<ByteRange> outputs = Lists.newArrayList();
|
||||
|
||||
public TestColumnDataRandom(int numColumns) {
|
||||
RedundantKVGenerator generator = new RedundantKVGenerator();
|
||||
ByteRangeSet sortedColumns = new ByteRangeTreeSet();
|
||||
List<KeyValue> d = generator.generateTestKeyValues(numColumns);
|
||||
for (KeyValue col : d) {
|
||||
ByteRange colRange = new SimpleMutableByteRange(CellUtil.cloneQualifier(col));
|
||||
inputs.add(colRange);
|
||||
sortedColumns.add(colRange);
|
||||
}
|
||||
for (ByteRange col : sortedColumns.compile().getSortedRanges()) {
|
||||
outputs.add(col);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<ByteRange> getInputs() {
|
||||
return inputs;
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<ByteRange> getOutputs() {
|
||||
return outputs;
|
||||
}
|
||||
|
||||
}
|
|
@ -1,52 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.hbase.codec.prefixtree.column.data;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.column.TestColumnData;
|
||||
import org.apache.hadoop.hbase.util.ByteRange;
|
||||
import org.apache.hadoop.hbase.util.ByteRangeUtils;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
|
||||
import org.apache.hadoop.hbase.shaded.com.google.common.collect.Lists;
|
||||
|
||||
public class TestColumnDataSimple implements TestColumnData {
|
||||
|
||||
@Override
|
||||
public List<ByteRange> getInputs() {
|
||||
List<String> d = Lists.newArrayList();
|
||||
d.add("abc");
|
||||
d.add("abcde");
|
||||
d.add("abc");
|
||||
d.add("bbc");
|
||||
d.add("abc");
|
||||
return ByteRangeUtils.fromArrays(Bytes.getUtf8ByteArrays(d));
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<ByteRange> getOutputs() {
|
||||
List<String> d = Lists.newArrayList();
|
||||
d.add("abc");
|
||||
d.add("abcde");
|
||||
d.add("bbc");
|
||||
return ByteRangeUtils.fromArrays(Bytes.getUtf8ByteArrays(d));
|
||||
}
|
||||
|
||||
}
|
|
@ -1,54 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.hbase.codec.prefixtree.row;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.hadoop.hbase.CellUtil;
|
||||
import org.apache.hadoop.hbase.KeyValue;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeBlockMeta;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.scanner.CellSearcher;
|
||||
|
||||
import org.apache.hadoop.hbase.shaded.com.google.common.collect.Lists;
|
||||
|
||||
public abstract class BaseTestRowData implements TestRowData {
|
||||
|
||||
@Override
|
||||
public List<Integer> getRowStartIndexes() {
|
||||
List<Integer> rowStartIndexes = Lists.newArrayList();
|
||||
rowStartIndexes.add(0);
|
||||
List<KeyValue> inputs = getInputs();
|
||||
for (int i = 1; i < inputs.size(); ++i) {
|
||||
KeyValue lastKv = inputs.get(i - 1);
|
||||
KeyValue kv = inputs.get(i);
|
||||
if (!CellUtil.matchingRows(lastKv, kv)) {
|
||||
rowStartIndexes.add(i);
|
||||
}
|
||||
}
|
||||
return rowStartIndexes;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void individualBlockMetaAssertions(PrefixTreeBlockMeta blockMeta) {
|
||||
}
|
||||
|
||||
@Override
|
||||
public void individualSearcherAssertions(CellSearcher searcher) {
|
||||
}
|
||||
}
|
|
@ -1,229 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.hbase.codec.prefixtree.row;
|
||||
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.hadoop.hbase.Cell;
|
||||
import org.apache.hadoop.hbase.CellUtil;
|
||||
import org.apache.hadoop.hbase.PrivateCellUtil;
|
||||
import org.apache.hadoop.hbase.KeyValue;
|
||||
import org.apache.hadoop.hbase.KeyValueUtil;
|
||||
import org.apache.hadoop.hbase.nio.ByteBuff;
|
||||
import org.apache.hadoop.hbase.nio.SingleByteBuff;
|
||||
import org.apache.hadoop.hbase.testclassification.MiscTests;
|
||||
import org.apache.hadoop.hbase.testclassification.SmallTests;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.decode.DecoderFactory;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.encode.PrefixTreeEncoder;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.row.data.TestRowDataSearchWithPrefix;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.scanner.CellScannerPosition;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.scanner.CellSearcher;
|
||||
import org.apache.hadoop.hbase.util.ByteBufferUtils;
|
||||
import org.apache.hadoop.hbase.util.CollectionUtils;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
import org.junit.experimental.categories.Category;
|
||||
import org.junit.runner.RunWith;
|
||||
import org.junit.runners.Parameterized;
|
||||
import org.junit.runners.Parameterized.Parameters;
|
||||
|
||||
@Category({MiscTests.class, SmallTests.class})
|
||||
@RunWith(Parameterized.class)
|
||||
public class TestPrefixTreeSearcher {
|
||||
|
||||
protected static int BLOCK_START = 7;
|
||||
|
||||
@Parameters
|
||||
public static Collection<Object[]> parameters() {
|
||||
return TestRowData.InMemory.getAllAsObjectArray();
|
||||
}
|
||||
|
||||
protected TestRowData rows;
|
||||
protected ByteBuff block;
|
||||
|
||||
public TestPrefixTreeSearcher(TestRowData testRows) throws IOException {
|
||||
this.rows = testRows;
|
||||
ByteArrayOutputStream os = new ByteArrayOutputStream(1 << 20);
|
||||
PrefixTreeEncoder kvBuilder = new PrefixTreeEncoder(os, true);
|
||||
for (KeyValue kv : rows.getInputs()) {
|
||||
kvBuilder.write(kv);
|
||||
}
|
||||
kvBuilder.flush();
|
||||
byte[] outputBytes = os.toByteArray();
|
||||
ByteBuffer out = ByteBuffer.allocateDirect(outputBytes.length);
|
||||
ByteBufferUtils.copyFromArrayToBuffer(out, outputBytes, 0, outputBytes.length);
|
||||
out.position(0);
|
||||
this.block = new SingleByteBuff(out);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testScanForwards() throws IOException {
|
||||
CellSearcher searcher = null;
|
||||
try {
|
||||
searcher = DecoderFactory.checkOut(block, true);
|
||||
|
||||
int i = -1;
|
||||
while (searcher.advance()) {
|
||||
++i;
|
||||
KeyValue inputCell = rows.getInputs().get(i);
|
||||
Cell outputCell = searcher.current();
|
||||
|
||||
// check all 3 permutations of equals()
|
||||
Assert.assertEquals(inputCell, outputCell);
|
||||
Assert.assertEquals(outputCell, inputCell);
|
||||
Assert.assertTrue(CellUtil.equals(inputCell, outputCell));
|
||||
}
|
||||
Assert.assertEquals(rows.getInputs().size(), i + 1);
|
||||
} finally {
|
||||
DecoderFactory.checkIn(searcher);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testScanBackwards() throws IOException {
|
||||
CellSearcher searcher = null;
|
||||
try {
|
||||
searcher = DecoderFactory.checkOut(block, true);
|
||||
searcher.positionAfterLastCell();
|
||||
int i = -1;
|
||||
while (searcher.previous()) {
|
||||
++i;
|
||||
int oppositeIndex = rows.getInputs().size() - i - 1;
|
||||
KeyValue inputKv = rows.getInputs().get(oppositeIndex);
|
||||
KeyValue outputKv = KeyValueUtil.copyToNewKeyValue(searcher.current());
|
||||
Assert.assertEquals(inputKv, outputKv);
|
||||
}
|
||||
Assert.assertEquals(rows.getInputs().size(), i + 1);
|
||||
} finally {
|
||||
DecoderFactory.checkIn(searcher);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testRandomSeekHits() throws IOException {
|
||||
CellSearcher searcher = null;
|
||||
try {
|
||||
searcher = DecoderFactory.checkOut(block, true);
|
||||
for (KeyValue kv : rows.getInputs()) {
|
||||
boolean hit = searcher.positionAt(kv);
|
||||
Assert.assertTrue(hit);
|
||||
Cell foundKv = searcher.current();
|
||||
Assert.assertTrue(CellUtil.equals(kv, foundKv));
|
||||
}
|
||||
} finally {
|
||||
DecoderFactory.checkIn(searcher);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testRandomSeekMisses() throws IOException {
|
||||
CellSearcher searcher = null;
|
||||
List<Integer> rowStartIndexes = rows.getRowStartIndexes();
|
||||
try {
|
||||
searcher = DecoderFactory.checkOut(block, true);
|
||||
|
||||
//test both the positionAtOrBefore and positionAtOrAfter methods
|
||||
for(boolean beforeVsAfterOnMiss : new boolean[]{true, false}){
|
||||
for (int i=0; i < rows.getInputs().size(); ++i) {
|
||||
KeyValue kv = rows.getInputs().get(i);
|
||||
|
||||
//nextRow
|
||||
Cell inputNextRow = PrivateCellUtil.createFirstOnNextRow(kv);
|
||||
|
||||
CellScannerPosition position = beforeVsAfterOnMiss
|
||||
? searcher.positionAtOrBefore(inputNextRow)
|
||||
: searcher.positionAtOrAfter(inputNextRow);
|
||||
boolean isFirstInRow = rowStartIndexes.contains(i);
|
||||
if(isFirstInRow){
|
||||
int rowIndex = rowStartIndexes.indexOf(i);
|
||||
if(rowIndex < rowStartIndexes.size() - 1){
|
||||
if(beforeVsAfterOnMiss){
|
||||
Assert.assertEquals(CellScannerPosition.BEFORE, position);
|
||||
}else{
|
||||
Assert.assertEquals(CellScannerPosition.AFTER, position);
|
||||
}
|
||||
|
||||
int expectedInputIndex = beforeVsAfterOnMiss
|
||||
? rowStartIndexes.get(rowIndex + 1) - 1
|
||||
: rowStartIndexes.get(rowIndex + 1);
|
||||
Assert.assertEquals(rows.getInputs().get(expectedInputIndex), searcher.current());
|
||||
}
|
||||
}
|
||||
|
||||
//previous KV
|
||||
KeyValue inputPreviousKv = KeyValueUtil.previousKey(kv);
|
||||
boolean hit = searcher.positionAt(inputPreviousKv);
|
||||
Assert.assertFalse(hit);
|
||||
position = searcher.positionAtOrAfter(inputPreviousKv);
|
||||
if(CollectionUtils.isLastIndex(rows.getInputs(), i)){
|
||||
Assert.assertTrue(CellScannerPosition.AFTER_LAST == position);
|
||||
}else{
|
||||
Assert.assertTrue(CellScannerPosition.AFTER == position);
|
||||
/*
|
||||
* TODO: why i+1 instead of i?
|
||||
*/
|
||||
Assert.assertEquals(rows.getInputs().get(i+1), searcher.current());
|
||||
}
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
DecoderFactory.checkIn(searcher);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testRandomSeekIndividualAssertions() throws IOException {
|
||||
CellSearcher searcher = null;
|
||||
try {
|
||||
searcher = DecoderFactory.checkOut(block, true);
|
||||
rows.individualSearcherAssertions(searcher);
|
||||
} finally {
|
||||
DecoderFactory.checkIn(searcher);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSeekWithPrefix() throws IOException {
|
||||
if (!(rows instanceof TestRowDataSearchWithPrefix)) {
|
||||
return;
|
||||
}
|
||||
CellSearcher searcher = null;
|
||||
try {
|
||||
searcher = DecoderFactory.checkOut(block, true);
|
||||
// seek with half bytes of second row key, should return second row
|
||||
KeyValue kv = rows.getInputs().get(1);
|
||||
KeyValue firstKVOnRow = KeyValueUtil.createFirstOnRow(Arrays.copyOfRange(
|
||||
kv.getRowArray(), kv.getRowOffset(),
|
||||
kv.getRowOffset() + kv.getRowLength() / 2));
|
||||
CellScannerPosition position = searcher.positionAtOrAfter(firstKVOnRow);
|
||||
Assert.assertEquals(CellScannerPosition.AFTER, position);
|
||||
Assert.assertEquals(kv, searcher.current());
|
||||
} finally {
|
||||
DecoderFactory.checkIn(searcher);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,105 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.hbase.codec.prefixtree.row;
|
||||
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.hadoop.hbase.KeyValue;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeBlockMeta;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.row.data.TestRowDataComplexQualifiers;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.row.data.TestRowDataDeeper;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.row.data.TestRowDataDifferentTimestamps;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.row.data.TestRowDataEmpty;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.row.data.TestRowDataExerciseFInts;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.row.data.TestRowDataNub;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.row.data.TestRowDataNumberStrings;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.row.data.TestRowDataQualifierByteOrdering;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.row.data.TestRowDataRandomKeyValues;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.row.data.TestRowDataRandomKeyValuesWithTags;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.row.data.TestRowDataSearchWithPrefix;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.row.data.TestRowDataSearcherRowMiss;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.row.data.TestRowDataSimple;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.row.data.TestRowDataSingleQualifier;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.row.data.TestRowDataTrivial;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.row.data.TestRowDataTrivialWithTags;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.row.data.TestRowDataUrls;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.row.data.TestRowDataUrlsExample;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.scanner.CellSearcher;
|
||||
|
||||
import org.apache.hadoop.hbase.shaded.com.google.common.collect.Lists;
|
||||
|
||||
/*
|
||||
* A master class for registering different implementations of TestRowData.
|
||||
*/
|
||||
public interface TestRowData {
|
||||
|
||||
List<KeyValue> getInputs();
|
||||
List<Integer> getRowStartIndexes();
|
||||
|
||||
void individualBlockMetaAssertions(PrefixTreeBlockMeta blockMeta);
|
||||
|
||||
void individualSearcherAssertions(CellSearcher searcher);
|
||||
|
||||
static class InMemory {
|
||||
|
||||
/*
|
||||
* The following are different styles of data that the codec may encounter. Having these small
|
||||
* representations of the data helps pinpoint what is wrong if the encoder breaks.
|
||||
*/
|
||||
public static Collection<TestRowData> getAll() {
|
||||
List<TestRowData> all = Lists.newArrayList();
|
||||
//simple
|
||||
all.add(new TestRowDataEmpty());
|
||||
all.add(new TestRowDataTrivial());
|
||||
all.add(new TestRowDataTrivialWithTags());
|
||||
all.add(new TestRowDataSimple());
|
||||
all.add(new TestRowDataDeeper());
|
||||
|
||||
//more specific
|
||||
all.add(new TestRowDataSingleQualifier());
|
||||
// all.add(new TestRowDataMultiFamilies());//multiple families disabled in PrefixTreeEncoder
|
||||
all.add(new TestRowDataNub());
|
||||
all.add(new TestRowDataSearcherRowMiss());
|
||||
all.add(new TestRowDataQualifierByteOrdering());
|
||||
all.add(new TestRowDataComplexQualifiers());
|
||||
all.add(new TestRowDataDifferentTimestamps());
|
||||
|
||||
//larger data volumes (hard to debug)
|
||||
all.add(new TestRowDataNumberStrings());
|
||||
all.add(new TestRowDataUrls());
|
||||
all.add(new TestRowDataUrlsExample());
|
||||
all.add(new TestRowDataExerciseFInts());
|
||||
all.add(new TestRowDataRandomKeyValues());
|
||||
all.add(new TestRowDataRandomKeyValuesWithTags());
|
||||
|
||||
//test data for HBase-12078
|
||||
all.add(new TestRowDataSearchWithPrefix());
|
||||
return all;
|
||||
}
|
||||
|
||||
public static Collection<Object[]> getAllAsObjectArray() {
|
||||
List<Object[]> all = Lists.newArrayList();
|
||||
for (TestRowData testRows : getAll()) {
|
||||
all.add(new Object[] { testRows });
|
||||
}
|
||||
return all;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,194 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.hbase.codec.prefixtree.row;
|
||||
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.hadoop.hbase.Cell;
|
||||
import org.apache.hadoop.hbase.KeyValue;
|
||||
import org.apache.hadoop.hbase.KeyValueUtil;
|
||||
import org.apache.hadoop.hbase.nio.ByteBuff;
|
||||
import org.apache.hadoop.hbase.nio.SingleByteBuff;
|
||||
import org.apache.hadoop.hbase.testclassification.MiscTests;
|
||||
import org.apache.hadoop.hbase.testclassification.SmallTests;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeBlockMeta;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.decode.PrefixTreeArraySearcher;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.encode.PrefixTreeEncoder;
|
||||
import org.apache.hadoop.hbase.util.ByteBufferUtils;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
import org.junit.experimental.categories.Category;
|
||||
import org.junit.runner.RunWith;
|
||||
import org.junit.runners.Parameterized;
|
||||
import org.junit.runners.Parameterized.Parameters;
|
||||
|
||||
import org.apache.hadoop.hbase.shaded.com.google.common.collect.Lists;
|
||||
|
||||
@Category({MiscTests.class, SmallTests.class})
|
||||
@RunWith(Parameterized.class)
|
||||
public class TestRowEncoder {
|
||||
|
||||
protected static int BLOCK_START = 7;
|
||||
|
||||
@Parameters
|
||||
public static Collection<Object[]> parameters() {
|
||||
return TestRowData.InMemory.getAllAsObjectArray();
|
||||
}
|
||||
|
||||
protected TestRowData rows;
|
||||
protected List<KeyValue> inputKvs;
|
||||
protected boolean includeMemstoreTS = true;
|
||||
protected ByteArrayOutputStream os;
|
||||
protected PrefixTreeEncoder encoder;
|
||||
protected int totalBytes;
|
||||
protected PrefixTreeBlockMeta blockMetaWriter;
|
||||
protected byte[] outputBytes;
|
||||
protected ByteBuff buffer;
|
||||
protected ByteArrayInputStream is;
|
||||
protected PrefixTreeBlockMeta blockMetaReader;
|
||||
protected byte[] inputBytes;
|
||||
protected PrefixTreeArraySearcher searcher;
|
||||
|
||||
public TestRowEncoder(TestRowData testRows) {
|
||||
this.rows = testRows;
|
||||
}
|
||||
|
||||
@Before
|
||||
public void compile() throws IOException {
|
||||
// Always run with tags. But should also ensure that KVs without tags work fine
|
||||
os = new ByteArrayOutputStream(1 << 20);
|
||||
encoder = new PrefixTreeEncoder(os, includeMemstoreTS);
|
||||
|
||||
inputKvs = rows.getInputs();
|
||||
for (KeyValue kv : inputKvs) {
|
||||
encoder.write(kv);
|
||||
}
|
||||
encoder.flush();
|
||||
totalBytes = encoder.getTotalBytes();
|
||||
blockMetaWriter = encoder.getBlockMeta();
|
||||
outputBytes = os.toByteArray();
|
||||
|
||||
// start reading, but save the assertions for @Test methods
|
||||
ByteBuffer out = ByteBuffer.allocateDirect(outputBytes.length);
|
||||
ByteBufferUtils.copyFromArrayToBuffer(out, outputBytes, 0, outputBytes.length);
|
||||
out.position(0);
|
||||
buffer = new SingleByteBuff(out);
|
||||
blockMetaReader = new PrefixTreeBlockMeta(buffer);
|
||||
|
||||
searcher = new PrefixTreeArraySearcher(blockMetaReader, blockMetaReader.getRowTreeDepth(),
|
||||
blockMetaReader.getMaxRowLength(), blockMetaReader.getMaxQualifierLength(),
|
||||
blockMetaReader.getMaxTagsLength());
|
||||
searcher.initOnBlock(blockMetaReader, buffer, includeMemstoreTS);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testEncoderOutput() throws IOException {
|
||||
Assert.assertEquals(totalBytes, outputBytes.length);
|
||||
Assert.assertEquals(blockMetaWriter, blockMetaReader);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testForwardScanner() {
|
||||
int counter = -1;
|
||||
while (searcher.advance()) {
|
||||
++counter;
|
||||
KeyValue inputKv = rows.getInputs().get(counter);
|
||||
KeyValue outputKv = KeyValueUtil.copyToNewKeyValue(searcher.current());
|
||||
assertKeyAndValueEqual(inputKv, outputKv);
|
||||
}
|
||||
// assert same number of cells
|
||||
Assert.assertEquals(rows.getInputs().size(), counter + 1);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* probably not needed since testReverseScannerWithJitter() below is more thorough
|
||||
*/
|
||||
@Test
|
||||
public void testReverseScanner() {
|
||||
searcher.positionAfterLastCell();
|
||||
int counter = -1;
|
||||
while (searcher.previous()) {
|
||||
++counter;
|
||||
int oppositeIndex = rows.getInputs().size() - counter - 1;
|
||||
KeyValue inputKv = rows.getInputs().get(oppositeIndex);
|
||||
KeyValue outputKv = KeyValueUtil.copyToNewKeyValue(searcher.current());
|
||||
assertKeyAndValueEqual(inputKv, outputKv);
|
||||
}
|
||||
Assert.assertEquals(rows.getInputs().size(), counter + 1);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Exercise the nubCellsRemain variable by calling next+previous. NubCellsRemain is basically
|
||||
* a special fan index.
|
||||
*/
|
||||
@Test
|
||||
public void testReverseScannerWithJitter() {
|
||||
searcher.positionAfterLastCell();
|
||||
int counter = -1;
|
||||
while (true) {
|
||||
boolean foundCell = searcher.previous();
|
||||
if (!foundCell) {
|
||||
break;
|
||||
}
|
||||
++counter;
|
||||
|
||||
// a next+previous should cancel out
|
||||
if (!searcher.isAfterLast()) {
|
||||
searcher.advance();
|
||||
searcher.previous();
|
||||
}
|
||||
|
||||
int oppositeIndex = rows.getInputs().size() - counter - 1;
|
||||
KeyValue inputKv = rows.getInputs().get(oppositeIndex);
|
||||
KeyValue outputKv = KeyValueUtil.copyToNewKeyValue(searcher.current());
|
||||
assertKeyAndValueEqual(inputKv, outputKv);
|
||||
}
|
||||
Assert.assertEquals(rows.getInputs().size(), counter + 1);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testIndividualBlockMetaAssertions() {
|
||||
rows.individualBlockMetaAssertions(blockMetaReader);
|
||||
}
|
||||
|
||||
|
||||
/**************** helper **************************/
|
||||
|
||||
protected void assertKeyAndValueEqual(Cell expected, Cell actual) {
|
||||
// assert keys are equal (doesn't compare values)
|
||||
Assert.assertEquals(expected, actual);
|
||||
if (includeMemstoreTS) {
|
||||
Assert.assertEquals(expected.getSequenceId(), actual.getSequenceId());
|
||||
}
|
||||
// assert values equal
|
||||
Assert.assertTrue(Bytes.equals(expected.getValueArray(), expected.getValueOffset(),
|
||||
expected.getValueLength(), actual.getValueArray(), actual.getValueOffset(),
|
||||
actual.getValueLength()));
|
||||
}
|
||||
|
||||
}
|
|
@ -1,67 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.hbase.codec.prefixtree.row.data;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.hadoop.hbase.KeyValue;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeTestConstants;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.row.BaseTestRowData;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
|
||||
import org.apache.hadoop.hbase.shaded.com.google.common.collect.Lists;
|
||||
|
||||
public class TestRowDataComplexQualifiers extends BaseTestRowData{
|
||||
|
||||
static byte[]
|
||||
Arow = Bytes.toBytes("Arow"),
|
||||
cf = PrefixTreeTestConstants.TEST_CF,
|
||||
v0 = Bytes.toBytes("v0");
|
||||
|
||||
static List<byte[]> qualifiers = Lists.newArrayList();
|
||||
static {
|
||||
List<String> qualifierStrings = Lists.newArrayList();
|
||||
qualifierStrings.add("cq");
|
||||
qualifierStrings.add("cq0");
|
||||
qualifierStrings.add("cq1");
|
||||
qualifierStrings.add("cq2");
|
||||
qualifierStrings.add("dq0");// second root level fan
|
||||
qualifierStrings.add("dq1");// nub
|
||||
qualifierStrings.add("dq111");// leaf on nub
|
||||
qualifierStrings.add("dq11111a");// leaf on leaf
|
||||
for (String s : qualifierStrings) {
|
||||
qualifiers.add(Bytes.toBytes(s));
|
||||
}
|
||||
}
|
||||
|
||||
static long ts = 55L;
|
||||
|
||||
static List<KeyValue> d = Lists.newArrayList();
|
||||
static {
|
||||
for (byte[] qualifier : qualifiers) {
|
||||
d.add(new KeyValue(Arow, cf, qualifier, ts, v0));
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<KeyValue> getInputs() {
|
||||
return d;
|
||||
}
|
||||
|
||||
}
|
|
@ -1,85 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.hbase.codec.prefixtree.row.data;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.hadoop.hbase.KeyValue;
|
||||
import org.apache.hadoop.hbase.KeyValueUtil;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeBlockMeta;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.row.BaseTestRowData;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.scanner.CellScannerPosition;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.scanner.CellSearcher;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
import org.junit.Assert;
|
||||
|
||||
import org.apache.hadoop.hbase.shaded.com.google.common.collect.Lists;
|
||||
|
||||
/*
|
||||
* Goes beyond a trivial trie to add a branch on the "cf" node
|
||||
*/
|
||||
public class TestRowDataDeeper extends BaseTestRowData{
|
||||
|
||||
static byte[]
|
||||
cdc = Bytes.toBytes("cdc"),
|
||||
cf6 = Bytes.toBytes("cf6"),
|
||||
cfc = Bytes.toBytes("cfc"),
|
||||
f = Bytes.toBytes("f"),
|
||||
q = Bytes.toBytes("q"),
|
||||
v = Bytes.toBytes("v");
|
||||
|
||||
static long
|
||||
ts = 55L;
|
||||
|
||||
static List<KeyValue> d = Lists.newArrayList();
|
||||
static{
|
||||
d.add(new KeyValue(cdc, f, q, ts, v));
|
||||
d.add(new KeyValue(cf6, f, q, ts, v));
|
||||
d.add(new KeyValue(cfc, f, q, ts, v));
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<KeyValue> getInputs() {
|
||||
return d;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void individualBlockMetaAssertions(PrefixTreeBlockMeta blockMeta) {
|
||||
//0: token:c; fan:d,f
|
||||
//1: token:f; fan:6,c
|
||||
//2: leaves
|
||||
Assert.assertEquals(3, blockMeta.getRowTreeDepth());
|
||||
}
|
||||
|
||||
@Override
|
||||
public void individualSearcherAssertions(CellSearcher searcher) {
|
||||
/**
|
||||
* The searcher should get a token mismatch on the "r" branch. Assert that it skips not only
|
||||
* rA, but rB as well.
|
||||
*/
|
||||
KeyValue cfcRow = KeyValueUtil.createFirstOnRow(Bytes.toBytes("cfc"));
|
||||
CellScannerPosition position = searcher.positionAtOrAfter(cfcRow);
|
||||
Assert.assertEquals(CellScannerPosition.AFTER, position);
|
||||
Assert.assertEquals(d.get(2), searcher.current());
|
||||
searcher.previous();
|
||||
Assert.assertEquals(d.get(1), searcher.current());
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1,94 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.hbase.codec.prefixtree.row.data;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.hadoop.hbase.KeyValue;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeBlockMeta;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.row.BaseTestRowData;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
import org.junit.Assert;
|
||||
|
||||
import org.apache.hadoop.hbase.shaded.com.google.common.collect.Lists;
|
||||
|
||||
/*
|
||||
* test different timestamps
|
||||
*/
|
||||
public class TestRowDataDifferentTimestamps extends BaseTestRowData{
|
||||
|
||||
static byte[]
|
||||
Arow = Bytes.toBytes("Arow"),
|
||||
Brow = Bytes.toBytes("Brow"),
|
||||
cf = Bytes.toBytes("fammy"),
|
||||
cq0 = Bytes.toBytes("cq0"),
|
||||
cq1 = Bytes.toBytes("cq1"),
|
||||
v0 = Bytes.toBytes("v0");
|
||||
|
||||
static List<KeyValue> d = Lists.newArrayList();
|
||||
static{
|
||||
KeyValue kv0 = new KeyValue(Arow, cf, cq0, 0L, v0);
|
||||
kv0.setSequenceId(123456789L);
|
||||
d.add(kv0);
|
||||
|
||||
KeyValue kv1 = new KeyValue(Arow, cf, cq1, 1L, v0);
|
||||
kv1.setSequenceId(3L);
|
||||
d.add(kv1);
|
||||
|
||||
KeyValue kv2 = new KeyValue(Brow, cf, cq0, 12345678L, v0);
|
||||
kv2.setSequenceId(65537L);
|
||||
d.add(kv2);
|
||||
|
||||
//watch out... Long.MAX_VALUE comes back as 1332221664203, even with other encoders
|
||||
//d.add(new KeyValue(Brow, cf, cq1, Long.MAX_VALUE, v0));
|
||||
KeyValue kv3 = new KeyValue(Brow, cf, cq1, Long.MAX_VALUE-1, v0);
|
||||
kv3.setSequenceId(1L);
|
||||
d.add(kv3);
|
||||
|
||||
KeyValue kv4 = new KeyValue(Brow, cf, cq1, 999999999, v0);
|
||||
//don't set memstoreTS
|
||||
d.add(kv4);
|
||||
|
||||
KeyValue kv5 = new KeyValue(Brow, cf, cq1, 12345, v0);
|
||||
kv5.setSequenceId(0L);
|
||||
d.add(kv5);
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<KeyValue> getInputs() {
|
||||
return d;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void individualBlockMetaAssertions(PrefixTreeBlockMeta blockMeta) {
|
||||
Assert.assertTrue(blockMeta.getNumMvccVersionBytes() > 0);
|
||||
Assert.assertEquals(12, blockMeta.getNumValueBytes());
|
||||
|
||||
Assert.assertFalse(blockMeta.isAllSameTimestamp());
|
||||
Assert.assertNotNull(blockMeta.getMinTimestamp());
|
||||
Assert.assertTrue(blockMeta.getTimestampIndexWidth() > 0);
|
||||
Assert.assertTrue(blockMeta.getTimestampDeltaWidth() > 0);
|
||||
|
||||
Assert.assertFalse(blockMeta.isAllSameMvccVersion());
|
||||
Assert.assertNotNull(blockMeta.getMinMvccVersion());
|
||||
Assert.assertTrue(blockMeta.getMvccVersionIndexWidth() > 0);
|
||||
Assert.assertTrue(blockMeta.getMvccVersionDeltaWidth() > 0);
|
||||
}
|
||||
|
||||
}
|
|
@ -1,43 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.hbase.codec.prefixtree.row.data;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.hadoop.hbase.KeyValue;
|
||||
import org.apache.hadoop.hbase.KeyValue.Type;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.row.BaseTestRowData;
|
||||
|
||||
import org.apache.hadoop.hbase.shaded.com.google.common.collect.Lists;
|
||||
|
||||
public class TestRowDataEmpty extends BaseTestRowData{
|
||||
|
||||
private static byte[] b = new byte[0];
|
||||
|
||||
static List<KeyValue> d = Lists.newArrayList();
|
||||
static {
|
||||
d.add(new KeyValue(b, b, b, 0L, Type.Put, b));
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<KeyValue> getInputs() {
|
||||
return d;
|
||||
}
|
||||
|
||||
}
|
|
@ -1,115 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.hbase.codec.prefixtree.row.data;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.hadoop.hbase.KeyValue;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeBlockMeta;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeTestConstants;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.row.BaseTestRowData;
|
||||
import org.apache.hadoop.hbase.util.ByteRange;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
import org.apache.hadoop.hbase.util.SimpleMutableByteRange;
|
||||
import org.apache.hadoop.hbase.util.byterange.impl.ByteRangeTreeSet;
|
||||
import org.junit.Assert;
|
||||
|
||||
import org.apache.hadoop.hbase.shaded.com.google.common.collect.Lists;
|
||||
|
||||
/*
|
||||
* test different timestamps
|
||||
*
|
||||
* http://pastebin.com/7ks8kzJ2
|
||||
* http://pastebin.com/MPn03nsK
|
||||
*/
|
||||
public class TestRowDataExerciseFInts extends BaseTestRowData{
|
||||
|
||||
static List<ByteRange> rows;
|
||||
static{
|
||||
List<String> rowStrings = new ArrayList<>(16);
|
||||
rowStrings.add("com.edsBlog/directoryAa/pageAaa");
|
||||
rowStrings.add("com.edsBlog/directoryAa/pageBbb");
|
||||
rowStrings.add("com.edsBlog/directoryAa/pageCcc");
|
||||
rowStrings.add("com.edsBlog/directoryAa/pageDdd");
|
||||
rowStrings.add("com.edsBlog/directoryBb/pageEee");
|
||||
rowStrings.add("com.edsBlog/directoryBb/pageFff");
|
||||
rowStrings.add("com.edsBlog/directoryBb/pageGgg");
|
||||
rowStrings.add("com.edsBlog/directoryBb/pageHhh");
|
||||
rowStrings.add("com.isabellasBlog/directoryAa/pageAaa");
|
||||
rowStrings.add("com.isabellasBlog/directoryAa/pageBbb");
|
||||
rowStrings.add("com.isabellasBlog/directoryAa/pageCcc");
|
||||
rowStrings.add("com.isabellasBlog/directoryAa/pageDdd");
|
||||
rowStrings.add("com.isabellasBlog/directoryBb/pageEee");
|
||||
rowStrings.add("com.isabellasBlog/directoryBb/pageFff");
|
||||
rowStrings.add("com.isabellasBlog/directoryBb/pageGgg");
|
||||
rowStrings.add("com.isabellasBlog/directoryBb/pageHhh");
|
||||
ByteRangeTreeSet ba = new ByteRangeTreeSet();
|
||||
for(String row : rowStrings){
|
||||
ba.add(new SimpleMutableByteRange(Bytes.toBytes(row)));
|
||||
}
|
||||
rows = ba.compile().getSortedRanges();
|
||||
}
|
||||
|
||||
static List<String> cols = Lists.newArrayList();
|
||||
static{
|
||||
cols.add("Chrome");
|
||||
cols.add("Chromeb");
|
||||
cols.add("Firefox");
|
||||
cols.add("InternetExplorer");
|
||||
cols.add("Opera");
|
||||
cols.add("Safari");
|
||||
cols.add("Z1stBrowserWithHuuuuuuuuuuuugeQualifier");
|
||||
cols.add("Z2ndBrowserWithEvenBiggerQualifierMoreMoreMoreMoreMore");
|
||||
cols.add("Z3rdBrowserWithEvenBiggerQualifierMoreMoreMoreMoreMore");
|
||||
cols.add("Z4thBrowserWithEvenBiggerQualifierMoreMoreMoreMoreMore");
|
||||
cols.add("Z5thBrowserWithEvenBiggerQualifierMoreMoreMoreMoreMore");
|
||||
cols.add("Z6thBrowserWithEvenBiggerQualifierMoreMoreMoreMoreMore");
|
||||
cols.add("Z7thBrowserWithEvenBiggerQualifierMoreMoreMoreMoreMore");
|
||||
cols.add("Z8thBrowserWithEvenBiggerQualifierMoreMoreMoreMoreMore");
|
||||
cols.add("Z9thBrowserWithEvenBiggerQualifierMoreMoreMoreMoreMore");
|
||||
}
|
||||
|
||||
static long ts = 1234567890;
|
||||
|
||||
static int MAX_VALUE = 50;
|
||||
|
||||
static List<KeyValue> kvs = Lists.newArrayList();
|
||||
static {
|
||||
for (ByteRange row : rows) {
|
||||
for (String col : cols) {
|
||||
KeyValue kv = new KeyValue(row.deepCopyToNewArray(), PrefixTreeTestConstants.TEST_CF,
|
||||
Bytes.toBytes(col), ts, KeyValue.Type.Put, Bytes.toBytes("VALUE"));
|
||||
kvs.add(kv);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<KeyValue> getInputs() {
|
||||
return kvs;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void individualBlockMetaAssertions(PrefixTreeBlockMeta blockMeta) {
|
||||
Assert.assertTrue(blockMeta.getNextNodeOffsetWidth() > 1);
|
||||
Assert.assertTrue(blockMeta.getQualifierOffsetWidth() > 1);
|
||||
}
|
||||
|
||||
}
|
|
@ -1,60 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.hbase.codec.prefixtree.row.data;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.hadoop.hbase.KeyValue;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.row.BaseTestRowData;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
|
||||
import org.apache.hadoop.hbase.shaded.com.google.common.collect.Lists;
|
||||
|
||||
public class TestRowDataMultiFamilies extends BaseTestRowData{
|
||||
|
||||
static byte[]
|
||||
rowA = Bytes.toBytes("rowA"),
|
||||
rowB = Bytes.toBytes("rowB"),
|
||||
famA = Bytes.toBytes("famA"),
|
||||
famB = Bytes.toBytes("famB"),
|
||||
famBB = Bytes.toBytes("famBB"),
|
||||
q0 = Bytes.toBytes("q0"),
|
||||
q1 = Bytes.toBytes("q1"),//start with a different character
|
||||
vvv = Bytes.toBytes("vvv");
|
||||
|
||||
static long ts = 55L;
|
||||
|
||||
static List<KeyValue> d = Lists.newArrayList();
|
||||
static {
|
||||
d.add(new KeyValue(rowA, famA, q0, ts, vvv));
|
||||
d.add(new KeyValue(rowA, famB, q1, ts, vvv));
|
||||
d.add(new KeyValue(rowA, famBB, q0, ts, vvv));
|
||||
d.add(new KeyValue(rowB, famA, q0, ts, vvv));
|
||||
d.add(new KeyValue(rowB, famA, q1, ts, vvv));
|
||||
d.add(new KeyValue(rowB, famB, q0, ts, vvv));
|
||||
d.add(new KeyValue(rowB, famBB, q0, ts, vvv));
|
||||
d.add(new KeyValue(rowB, famBB, q1, ts, vvv));
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<KeyValue> getInputs() {
|
||||
return d;
|
||||
}
|
||||
|
||||
}
|
|
@ -1,59 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.hbase.codec.prefixtree.row.data;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.hadoop.hbase.KeyValue;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeTestConstants;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.row.BaseTestRowData;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
|
||||
import org.apache.hadoop.hbase.shaded.com.google.common.collect.Lists;
|
||||
|
||||
public class TestRowDataNub extends BaseTestRowData{
|
||||
|
||||
static byte[]
|
||||
rowA = Bytes.toBytes("rowA"),
|
||||
rowB = Bytes.toBytes("rowB"),//nub
|
||||
rowBB = Bytes.toBytes("rowBB"),
|
||||
cf = PrefixTreeTestConstants.TEST_CF,
|
||||
cq0 = Bytes.toBytes("cq0"),
|
||||
cq1 = Bytes.toBytes("cq1"),
|
||||
v0 = Bytes.toBytes("v0");
|
||||
|
||||
static long
|
||||
ts = 55L;
|
||||
|
||||
static List<KeyValue> d = Lists.newArrayList();
|
||||
static{
|
||||
d.add(new KeyValue(rowA, cf, cq0, ts, v0));
|
||||
d.add(new KeyValue(rowA, cf, cq1, ts, v0));
|
||||
d.add(new KeyValue(rowB, cf, cq0, ts, v0));
|
||||
d.add(new KeyValue(rowB, cf, cq1, ts, v0));
|
||||
d.add(new KeyValue(rowBB, cf, cq0, ts, v0));
|
||||
d.add(new KeyValue(rowBB, cf, cq1, ts, v0));
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<KeyValue> getInputs() {
|
||||
return d;
|
||||
}
|
||||
|
||||
}
|
|
@ -1,61 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.hbase.codec.prefixtree.row.data;
|
||||
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.hadoop.hbase.CellComparatorImpl;
|
||||
import org.apache.hadoop.hbase.KeyValue;
|
||||
import org.apache.hadoop.hbase.KeyValue.Type;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.row.BaseTestRowData;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
|
||||
import org.apache.hadoop.hbase.shaded.com.google.common.collect.Lists;
|
||||
|
||||
public class TestRowDataNumberStrings extends BaseTestRowData{
|
||||
|
||||
static List<KeyValue> d = Lists.newArrayList();
|
||||
static {
|
||||
|
||||
/**
|
||||
* Test a string-encoded list of numbers. 0, 1, 10, 11 will sort as 0, 1, 10, 11 if strings
|
||||
* <p/>
|
||||
* This helped catch a bug with reverse scanning where it was jumping from the last leaf cell to
|
||||
* the previous nub. It should do 11->10, but it was incorrectly doing 11->1
|
||||
*/
|
||||
List<Integer> problematicSeries = Lists.newArrayList(0, 1, 10, 11);//sort this at the end
|
||||
for(Integer i : problematicSeries){
|
||||
// for(int i=0; i < 13; ++i){
|
||||
byte[] row = Bytes.toBytes(""+i);
|
||||
byte[] family = Bytes.toBytes("F");
|
||||
byte[] column = Bytes.toBytes("C");
|
||||
byte[] value = Bytes.toBytes("V");
|
||||
|
||||
d.add(new KeyValue(row, family, column, 0L, Type.Put, value));
|
||||
}
|
||||
Collections.sort(d, CellComparatorImpl.COMPARATOR);
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<KeyValue> getInputs() {
|
||||
return d;
|
||||
}
|
||||
|
||||
}
|
|
@ -1,58 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.hbase.codec.prefixtree.row.data;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.hadoop.hbase.KeyValue;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.row.BaseTestRowData;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
|
||||
import org.apache.hadoop.hbase.shaded.com.google.common.collect.Lists;
|
||||
|
||||
public class TestRowDataQualifierByteOrdering extends BaseTestRowData{
|
||||
|
||||
static byte[]
|
||||
Arow = Bytes.toBytes("Arow"),
|
||||
Brow = Bytes.toBytes("Brow"),
|
||||
Brow2 = Bytes.toBytes("Brow2"),
|
||||
fam = Bytes.toBytes("HappyFam"),
|
||||
cq0 = Bytes.toBytes("cq0"),
|
||||
cq1 = Bytes.toBytes("cq1tail"),//make sure tail does not come back as liat
|
||||
cq2 = Bytes.toBytes("cq2"),
|
||||
v0 = Bytes.toBytes("v0");
|
||||
|
||||
static long ts = 55L;
|
||||
|
||||
static List<KeyValue> d = Lists.newArrayList();
|
||||
static {
|
||||
d.add(new KeyValue(Arow, fam, cq0, ts, v0));
|
||||
d.add(new KeyValue(Arow, fam, cq1, ts, v0));
|
||||
d.add(new KeyValue(Brow, fam, cq0, ts, v0));
|
||||
d.add(new KeyValue(Brow, fam, cq2, ts, v0));
|
||||
d.add(new KeyValue(Brow2, fam, cq1, ts, v0));
|
||||
d.add(new KeyValue(Brow2, fam, cq2, ts, v0));
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<KeyValue> getInputs() {
|
||||
return d;
|
||||
}
|
||||
|
||||
}
|
|
@ -1,42 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.hbase.codec.prefixtree.row.data;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.hadoop.hbase.KeyValue;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.row.BaseTestRowData;
|
||||
import org.apache.hadoop.hbase.util.RedundantKVGenerator;
|
||||
|
||||
import org.apache.hadoop.hbase.shaded.com.google.common.collect.Lists;
|
||||
|
||||
public class TestRowDataRandomKeyValues extends BaseTestRowData {
|
||||
|
||||
static List<KeyValue> d = Lists.newArrayList();
|
||||
static RedundantKVGenerator generator = new RedundantKVGenerator();
|
||||
static {
|
||||
d = generator.generateTestKeyValues(1 << 10);
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<KeyValue> getInputs() {
|
||||
return d;
|
||||
}
|
||||
|
||||
}
|
|
@ -1,41 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.hbase.codec.prefixtree.row.data;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.hadoop.hbase.KeyValue;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.row.BaseTestRowData;
|
||||
import org.apache.hadoop.hbase.util.RedundantKVGenerator;
|
||||
|
||||
import org.apache.hadoop.hbase.shaded.com.google.common.collect.Lists;
|
||||
/**
|
||||
* Generated KVs with tags
|
||||
*/
|
||||
public class TestRowDataRandomKeyValuesWithTags extends BaseTestRowData {
|
||||
static List<KeyValue> d = Lists.newArrayList();
|
||||
static RedundantKVGenerator generator = new RedundantKVGenerator();
|
||||
static {
|
||||
d = generator.generateTestKeyValues(1 << 10, true);
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<KeyValue> getInputs() {
|
||||
return d;
|
||||
}
|
||||
}
|
|
@ -1,74 +0,0 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.hbase.codec.prefixtree.row.data;
|
||||
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.DataOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.hadoop.hbase.KeyValue;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.row.BaseTestRowData;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
|
||||
import org.apache.hadoop.hbase.shaded.com.google.common.collect.Lists;
|
||||
|
||||
public class TestRowDataSearchWithPrefix extends BaseTestRowData {
|
||||
|
||||
static byte[] cf = Bytes.toBytes("cf");
|
||||
|
||||
static byte[] cq = Bytes.toBytes("cq");
|
||||
|
||||
static byte[] v = Bytes.toBytes("v");
|
||||
|
||||
static List<KeyValue> d = Lists.newArrayList();
|
||||
|
||||
static long ts = 55L;
|
||||
|
||||
static byte[] createRowKey(int keyPart1, int keyPart2) {
|
||||
ByteArrayOutputStream bos = new ByteArrayOutputStream(16);
|
||||
DataOutputStream dos = new DataOutputStream(bos);
|
||||
try {
|
||||
dos.writeInt(keyPart1);
|
||||
dos.writeInt(keyPart2);
|
||||
} catch (IOException e) {
|
||||
// should not happen
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
|
||||
return bos.toByteArray();
|
||||
}
|
||||
|
||||
static {
|
||||
d.add(new KeyValue(createRowKey(1, 12345), cf, cq, ts, v));
|
||||
d.add(new KeyValue(createRowKey(12345, 0x01000000), cf, cq, ts, v));
|
||||
d.add(new KeyValue(createRowKey(12345, 0x01010000), cf, cq, ts, v));
|
||||
d.add(new KeyValue(createRowKey(12345, 0x02000000), cf, cq, ts, v));
|
||||
d.add(new KeyValue(createRowKey(12345, 0x02020000), cf, cq, ts, v));
|
||||
d.add(new KeyValue(createRowKey(12345, 0x03000000), cf, cq, ts, v));
|
||||
d.add(new KeyValue(createRowKey(12345, 0x03030000), cf, cq, ts, v));
|
||||
d.add(new KeyValue(createRowKey(12345, 0x04000000), cf, cq, ts, v));
|
||||
d.add(new KeyValue(createRowKey(12345, 0x04040000), cf, cq, ts, v));
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<KeyValue> getInputs() {
|
||||
return d;
|
||||
}
|
||||
|
||||
}
|
|
@ -1,128 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.hbase.codec.prefixtree.row.data;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.hadoop.hbase.Cell;
|
||||
import org.apache.hadoop.hbase.CellUtil;
|
||||
import org.apache.hadoop.hbase.KeyValue;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.row.BaseTestRowData;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.scanner.CellScannerPosition;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.scanner.CellSearcher;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
import org.junit.Assert;
|
||||
|
||||
import org.apache.hadoop.hbase.shaded.com.google.common.collect.Lists;
|
||||
|
||||
public class TestRowDataSearcherRowMiss extends BaseTestRowData{
|
||||
|
||||
static byte[]
|
||||
//don't let the rows share any common prefix bytes
|
||||
A = Bytes.toBytes("A"),
|
||||
AA = Bytes.toBytes("AA"),
|
||||
AAA = Bytes.toBytes("AAA"),
|
||||
B = Bytes.toBytes("B"),
|
||||
cf = Bytes.toBytes("fam"),
|
||||
cq = Bytes.toBytes("cq0"),
|
||||
v = Bytes.toBytes("v0");
|
||||
|
||||
static long
|
||||
ts = 55L;
|
||||
|
||||
static List<KeyValue> d = Lists.newArrayList();
|
||||
static{
|
||||
d.add(new KeyValue(A, cf, cq, ts, v));
|
||||
d.add(new KeyValue(AA, cf, cq, ts, v));
|
||||
d.add(new KeyValue(AAA, cf, cq, ts, v));
|
||||
d.add(new KeyValue(B, cf, cq, ts, v));
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<KeyValue> getInputs() {
|
||||
return d;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void individualSearcherAssertions(CellSearcher searcher) {
|
||||
assertRowOffsetsCorrect();
|
||||
|
||||
searcher.resetToBeforeFirstEntry();
|
||||
|
||||
//test first cell
|
||||
try {
|
||||
searcher.advance();
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
Cell first = searcher.current();
|
||||
Assert.assertTrue(CellUtil.equals(d.get(0), first));
|
||||
|
||||
//test first cell in second row
|
||||
Assert.assertTrue(searcher.positionAt(d.get(1)));
|
||||
Assert.assertTrue(CellUtil.equals(d.get(1), searcher.current()));
|
||||
|
||||
testBetween1and2(searcher);
|
||||
testBetween2and3(searcher);
|
||||
}
|
||||
|
||||
/************ private methods, call from above *******************/
|
||||
|
||||
private void assertRowOffsetsCorrect(){
|
||||
Assert.assertEquals(4, getRowStartIndexes().size());
|
||||
}
|
||||
|
||||
private void testBetween1and2(CellSearcher searcher){
|
||||
CellScannerPosition p;//reuse
|
||||
Cell betweenAAndAAA = new KeyValue(AA, cf, cq, ts-2, v);
|
||||
|
||||
//test exact
|
||||
Assert.assertFalse(searcher.positionAt(betweenAAndAAA));
|
||||
|
||||
//test atOrBefore
|
||||
p = searcher.positionAtOrBefore(betweenAAndAAA);
|
||||
Assert.assertEquals(CellScannerPosition.BEFORE, p);
|
||||
Assert.assertTrue(CellUtil.equals(searcher.current(), d.get(1)));
|
||||
|
||||
//test atOrAfter
|
||||
p = searcher.positionAtOrAfter(betweenAAndAAA);
|
||||
Assert.assertEquals(CellScannerPosition.AFTER, p);
|
||||
Assert.assertTrue(CellUtil.equals(searcher.current(), d.get(2)));
|
||||
}
|
||||
|
||||
private void testBetween2and3(CellSearcher searcher){
|
||||
CellScannerPosition p;//reuse
|
||||
Cell betweenAAAndB = new KeyValue(AAA, cf, cq, ts-2, v);
|
||||
|
||||
//test exact
|
||||
Assert.assertFalse(searcher.positionAt(betweenAAAndB));
|
||||
|
||||
//test atOrBefore
|
||||
p = searcher.positionAtOrBefore(betweenAAAndB);
|
||||
Assert.assertEquals(CellScannerPosition.BEFORE, p);
|
||||
Assert.assertTrue(CellUtil.equals(searcher.current(), d.get(2)));
|
||||
|
||||
//test atOrAfter
|
||||
p = searcher.positionAtOrAfter(betweenAAAndB);
|
||||
Assert.assertEquals(CellScannerPosition.AFTER, p);
|
||||
Assert.assertTrue(CellUtil.equals(searcher.current(), d.get(3)));
|
||||
}
|
||||
|
||||
}
|
|
@ -1,117 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.hbase.codec.prefixtree.row.data;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.hadoop.hbase.Cell;
|
||||
import org.apache.hadoop.hbase.CellUtil;
|
||||
import org.apache.hadoop.hbase.KeyValue;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.row.BaseTestRowData;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.scanner.CellScannerPosition;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.scanner.CellSearcher;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
import org.apache.hadoop.hbase.util.CollectionUtils;
|
||||
import org.junit.Assert;
|
||||
|
||||
import org.apache.hadoop.hbase.shaded.com.google.common.collect.Lists;
|
||||
|
||||
public class TestRowDataSimple extends BaseTestRowData {
|
||||
|
||||
static byte[]
|
||||
// don't let the rows share any common prefix bytes
|
||||
rowA = Bytes.toBytes("Arow"),
|
||||
rowB = Bytes.toBytes("Brow"), cf = Bytes.toBytes("fam"),
|
||||
cq0 = Bytes.toBytes("cq0"),
|
||||
cq1 = Bytes.toBytes("cq1tail"),// make sure tail does not come back as liat
|
||||
cq2 = Bytes.toBytes("dcq2"),// start with a different character
|
||||
v0 = Bytes.toBytes("v0");
|
||||
|
||||
static long ts = 55L;
|
||||
|
||||
static List<KeyValue> d = Lists.newArrayList();
|
||||
static {
|
||||
d.add(new KeyValue(rowA, cf, cq0, ts, v0));
|
||||
d.add(new KeyValue(rowA, cf, cq1, ts, v0));
|
||||
d.add(new KeyValue(rowA, cf, cq2, ts, v0));
|
||||
d.add(new KeyValue(rowB, cf, cq0, ts, v0));
|
||||
d.add(new KeyValue(rowB, cf, cq1, ts, v0));
|
||||
d.add(new KeyValue(rowB, cf, cq2, ts, v0));
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<KeyValue> getInputs() {
|
||||
return d;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void individualSearcherAssertions(CellSearcher searcher) {
|
||||
CellScannerPosition p;// reuse
|
||||
searcher.resetToBeforeFirstEntry();
|
||||
|
||||
// test first cell
|
||||
try {
|
||||
searcher.advance();
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
Cell first = searcher.current();
|
||||
Assert.assertTrue(CellUtil.equals(d.get(0), first));
|
||||
|
||||
// test first cell in second row
|
||||
Assert.assertTrue(searcher.positionAt(d.get(3)));
|
||||
Assert.assertTrue(CellUtil.equals(d.get(3), searcher.current()));
|
||||
|
||||
Cell between4And5 = new KeyValue(rowB, cf, cq1, ts - 2, v0);
|
||||
|
||||
// test exact
|
||||
Assert.assertFalse(searcher.positionAt(between4And5));
|
||||
|
||||
// test atOrBefore
|
||||
p = searcher.positionAtOrBefore(between4And5);
|
||||
Assert.assertEquals(CellScannerPosition.BEFORE, p);
|
||||
Assert.assertTrue(CellUtil.equals(searcher.current(), d.get(4)));
|
||||
|
||||
// test atOrAfter
|
||||
p = searcher.positionAtOrAfter(between4And5);
|
||||
Assert.assertEquals(CellScannerPosition.AFTER, p);
|
||||
Assert.assertTrue(CellUtil.equals(searcher.current(), d.get(5)));
|
||||
|
||||
// test when key falls before first key in block
|
||||
Cell beforeFirst = new KeyValue(Bytes.toBytes("A"), cf, cq0, ts, v0);
|
||||
Assert.assertFalse(searcher.positionAt(beforeFirst));
|
||||
p = searcher.positionAtOrBefore(beforeFirst);
|
||||
Assert.assertEquals(CellScannerPosition.BEFORE_FIRST, p);
|
||||
p = searcher.positionAtOrAfter(beforeFirst);
|
||||
Assert.assertEquals(CellScannerPosition.AFTER, p);
|
||||
Assert.assertTrue(CellUtil.equals(searcher.current(), d.get(0)));
|
||||
Assert.assertEquals(d.get(0), searcher.current());
|
||||
|
||||
// test when key falls after last key in block
|
||||
Cell afterLast = new KeyValue(Bytes.toBytes("z"), cf, cq0, ts, v0);// must be lower case z
|
||||
Assert.assertFalse(searcher.positionAt(afterLast));
|
||||
p = searcher.positionAtOrAfter(afterLast);
|
||||
Assert.assertEquals(CellScannerPosition.AFTER_LAST, p);
|
||||
p = searcher.positionAtOrBefore(afterLast);
|
||||
Assert.assertEquals(CellScannerPosition.BEFORE, p);
|
||||
Assert.assertTrue(CellUtil.equals(searcher.current(), CollectionUtils.getLast(d)));
|
||||
}
|
||||
|
||||
}
|
|
@ -1,52 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.hbase.codec.prefixtree.row.data;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.hadoop.hbase.KeyValue;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeTestConstants;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.row.BaseTestRowData;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
|
||||
import org.apache.hadoop.hbase.shaded.com.google.common.collect.Lists;
|
||||
|
||||
public class TestRowDataSingleQualifier extends BaseTestRowData{
|
||||
|
||||
static byte[]
|
||||
rowA = Bytes.toBytes("rowA"),
|
||||
rowB = Bytes.toBytes("rowB"),
|
||||
cf = PrefixTreeTestConstants.TEST_CF,
|
||||
cq0 = Bytes.toBytes("cq0"),
|
||||
v0 = Bytes.toBytes("v0");
|
||||
|
||||
static long ts = 55L;
|
||||
|
||||
static List<KeyValue> d = Lists.newArrayList();
|
||||
static {
|
||||
d.add(new KeyValue(rowA, cf, cq0, ts, v0));
|
||||
d.add(new KeyValue(rowB, cf, cq0, ts, v0));
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<KeyValue> getInputs() {
|
||||
return d;
|
||||
}
|
||||
|
||||
}
|
|
@ -1,74 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.hbase.codec.prefixtree.row.data;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.hadoop.hbase.KeyValue;
|
||||
import org.apache.hadoop.hbase.KeyValueUtil;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeBlockMeta;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.row.BaseTestRowData;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.scanner.CellScannerPosition;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.scanner.CellSearcher;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
import org.junit.Assert;
|
||||
|
||||
import org.apache.hadoop.hbase.shaded.com.google.common.collect.Lists;
|
||||
|
||||
public class TestRowDataTrivial extends BaseTestRowData{
|
||||
|
||||
static byte[]
|
||||
rA = Bytes.toBytes("rA"),
|
||||
rB = Bytes.toBytes("rB"),//turn "r" into a branch for the Searcher tests
|
||||
cf = Bytes.toBytes("fam"),
|
||||
cq0 = Bytes.toBytes("q0"),
|
||||
v0 = Bytes.toBytes("v0");
|
||||
|
||||
static long ts = 55L;
|
||||
|
||||
static List<KeyValue> d = Lists.newArrayList();
|
||||
static {
|
||||
d.add(new KeyValue(rA, cf, cq0, ts, v0));
|
||||
d.add(new KeyValue(rB, cf, cq0, ts, v0));
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<KeyValue> getInputs() {
|
||||
return d;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void individualBlockMetaAssertions(PrefixTreeBlockMeta blockMeta) {
|
||||
// node[0] -> root[r]
|
||||
// node[1] -> leaf[A], etc
|
||||
Assert.assertEquals(2, blockMeta.getRowTreeDepth());
|
||||
}
|
||||
|
||||
@Override
|
||||
public void individualSearcherAssertions(CellSearcher searcher) {
|
||||
/**
|
||||
* The searcher should get a token mismatch on the "r" branch. Assert that it skips not only rA,
|
||||
* but rB as well.
|
||||
*/
|
||||
KeyValue afterLast = KeyValueUtil.createFirstOnRow(Bytes.toBytes("zzz"));
|
||||
CellScannerPosition position = searcher.positionAtOrAfter(afterLast);
|
||||
Assert.assertEquals(CellScannerPosition.AFTER_LAST, position);
|
||||
Assert.assertNull(searcher.current());
|
||||
}
|
||||
}
|
|
@ -1,81 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.hbase.codec.prefixtree.row.data;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.hadoop.hbase.KeyValue;
|
||||
import org.apache.hadoop.hbase.KeyValueUtil;
|
||||
import org.apache.hadoop.hbase.Tag;
|
||||
import org.apache.hadoop.hbase.ArrayBackedTag;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeBlockMeta;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.row.BaseTestRowData;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.scanner.CellScannerPosition;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.scanner.CellSearcher;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
import org.junit.Assert;
|
||||
|
||||
import org.apache.hadoop.hbase.shaded.com.google.common.collect.Lists;
|
||||
|
||||
public class TestRowDataTrivialWithTags extends BaseTestRowData{
|
||||
static byte[] rA = Bytes.toBytes("rA"), rB = Bytes.toBytes("rB"),// turn "r"
|
||||
// into a
|
||||
// branch for
|
||||
// the
|
||||
// Searcher
|
||||
// tests
|
||||
cf = Bytes.toBytes("fam"), cq0 = Bytes.toBytes("q0"), v0 = Bytes.toBytes("v0");
|
||||
|
||||
static long ts = 55L;
|
||||
|
||||
static List<KeyValue> d = Lists.newArrayList();
|
||||
static {
|
||||
List<Tag> tagList = new ArrayList<>(2);
|
||||
Tag t = new ArrayBackedTag((byte) 1, "visisbility");
|
||||
tagList.add(t);
|
||||
t = new ArrayBackedTag((byte) 2, "ACL");
|
||||
tagList.add(t);
|
||||
d.add(new KeyValue(rA, cf, cq0, ts, v0, tagList));
|
||||
d.add(new KeyValue(rB, cf, cq0, ts, v0, tagList));
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<KeyValue> getInputs() {
|
||||
return d;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void individualBlockMetaAssertions(PrefixTreeBlockMeta blockMeta) {
|
||||
// node[0] -> root[r]
|
||||
// node[1] -> leaf[A], etc
|
||||
Assert.assertEquals(2, blockMeta.getRowTreeDepth());
|
||||
}
|
||||
|
||||
@Override
|
||||
public void individualSearcherAssertions(CellSearcher searcher) {
|
||||
/**
|
||||
* The searcher should get a token mismatch on the "r" branch. Assert that
|
||||
* it skips not only rA, but rB as well.
|
||||
*/
|
||||
KeyValue afterLast = KeyValueUtil.createFirstOnRow(Bytes.toBytes("zzz"));
|
||||
CellScannerPosition position = searcher.positionAtOrAfter(afterLast);
|
||||
Assert.assertEquals(CellScannerPosition.AFTER_LAST, position);
|
||||
Assert.assertNull(searcher.current());
|
||||
}
|
||||
}
|
|
@ -1,99 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.hbase.codec.prefixtree.row.data;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.hadoop.hbase.KeyValue;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeTestConstants;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.row.BaseTestRowData;
|
||||
import org.apache.hadoop.hbase.util.ByteRange;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
import org.apache.hadoop.hbase.util.SimpleMutableByteRange;
|
||||
import org.apache.hadoop.hbase.util.byterange.impl.ByteRangeTreeSet;
|
||||
|
||||
import org.apache.hadoop.hbase.shaded.com.google.common.collect.Lists;
|
||||
|
||||
/*
|
||||
* test different timestamps
|
||||
*
|
||||
* http://pastebin.com/7ks8kzJ2
|
||||
* http://pastebin.com/MPn03nsK
|
||||
*/
|
||||
public class TestRowDataUrls extends BaseTestRowData{
|
||||
|
||||
static List<ByteRange> rows;
|
||||
static{
|
||||
List<String> rowStrings = new ArrayList<>(16);
|
||||
rowStrings.add("com.edsBlog/directoryAa/pageAaa");
|
||||
rowStrings.add("com.edsBlog/directoryAa/pageBbb");
|
||||
rowStrings.add("com.edsBlog/directoryAa/pageCcc");
|
||||
rowStrings.add("com.edsBlog/directoryAa/pageDdd");
|
||||
rowStrings.add("com.edsBlog/directoryBb/pageEee");
|
||||
rowStrings.add("com.edsBlog/directoryBb/pageFff");
|
||||
rowStrings.add("com.edsBlog/directoryBb/pageGgg");
|
||||
rowStrings.add("com.edsBlog/directoryBb/pageHhh");
|
||||
rowStrings.add("com.isabellasBlog/directoryAa/pageAaa");
|
||||
rowStrings.add("com.isabellasBlog/directoryAa/pageBbb");
|
||||
rowStrings.add("com.isabellasBlog/directoryAa/pageCcc");
|
||||
rowStrings.add("com.isabellasBlog/directoryAa/pageDdd");
|
||||
rowStrings.add("com.isabellasBlog/directoryBb/pageEee");
|
||||
rowStrings.add("com.isabellasBlog/directoryBb/pageFff");
|
||||
rowStrings.add("com.isabellasBlog/directoryBb/pageGgg");
|
||||
rowStrings.add("com.isabellasBlog/directoryBb/pageHhh");
|
||||
ByteRangeTreeSet ba = new ByteRangeTreeSet();
|
||||
for (String row : rowStrings) {
|
||||
ba.add(new SimpleMutableByteRange(Bytes.toBytes(row)));
|
||||
}
|
||||
rows = ba.compile().getSortedRanges();
|
||||
}
|
||||
|
||||
static List<String> cols = Lists.newArrayList();
|
||||
static {
|
||||
cols.add("Chrome");
|
||||
cols.add("Chromeb");
|
||||
cols.add("Firefox");
|
||||
cols.add("InternetExplorer");
|
||||
cols.add("Opera");
|
||||
cols.add("Safari");
|
||||
}
|
||||
|
||||
static long ts = 1234567890;
|
||||
|
||||
static int MAX_VALUE = 50;
|
||||
|
||||
static List<KeyValue> kvs = Lists.newArrayList();
|
||||
static {
|
||||
for (ByteRange row : rows) {
|
||||
for (String col : cols) {
|
||||
KeyValue kv = new KeyValue(row.deepCopyToNewArray(), PrefixTreeTestConstants.TEST_CF,
|
||||
Bytes.toBytes(col), ts, KeyValue.Type.Put, Bytes.toBytes("VALUE"));
|
||||
kvs.add(kv);
|
||||
// System.out.println("TestRows5:"+kv);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<KeyValue> getInputs() {
|
||||
return kvs;
|
||||
}
|
||||
|
||||
}
|
|
@ -1,126 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.hbase.codec.prefixtree.row.data;
|
||||
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.hadoop.hbase.KeyValue;
|
||||
import org.apache.hadoop.hbase.KeyValueTestUtil;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.encode.PrefixTreeEncoder;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.encode.column.ColumnNodeWriter;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.encode.row.RowNodeWriter;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.encode.tokenize.TokenizerNode;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.row.BaseTestRowData;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
|
||||
import org.apache.hadoop.hbase.shaded.com.google.common.collect.Lists;
|
||||
|
||||
/*
|
||||
* test different timestamps
|
||||
*
|
||||
* http://pastebin.com/7ks8kzJ2
|
||||
* http://pastebin.com/MPn03nsK
|
||||
*/
|
||||
public class TestRowDataUrlsExample extends BaseTestRowData{
|
||||
|
||||
static String TENANT_ID = Integer.toString(95322);
|
||||
static String APP_ID = Integer.toString(12);
|
||||
static List<String> URLS = Lists.newArrayList(
|
||||
"com.dablog/2011/10/04/boating",
|
||||
"com.dablog/2011/10/09/lasers",
|
||||
"com.jamiesrecipes", //this nub helped find a bug
|
||||
"com.jamiesrecipes/eggs");
|
||||
static String FAMILY = "hits";
|
||||
static List<String> BROWSERS = Lists.newArrayList(
|
||||
"Chrome", "IE8", "IE9beta");//, "Opera", "Safari");
|
||||
static long TIMESTAMP = 1234567890;
|
||||
|
||||
static int MAX_VALUE = 50;
|
||||
|
||||
static List<KeyValue> kvs = Lists.newArrayList();
|
||||
static{
|
||||
for(String rowKey : URLS){
|
||||
for(String qualifier : BROWSERS){
|
||||
KeyValue kv = new KeyValue(
|
||||
Bytes.toBytes(rowKey),
|
||||
Bytes.toBytes(FAMILY),
|
||||
Bytes.toBytes(qualifier),
|
||||
TIMESTAMP,
|
||||
KeyValue.Type.Put,
|
||||
Bytes.toBytes("VvvV"));
|
||||
kvs.add(kv);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Used for generating docs.
|
||||
*/
|
||||
public static void main(String... args) throws IOException{
|
||||
System.out.println("-- inputs --");
|
||||
System.out.println(KeyValueTestUtil.toStringWithPadding(kvs, true));
|
||||
ByteArrayOutputStream os = new ByteArrayOutputStream(1<<20);
|
||||
PrefixTreeEncoder encoder = new PrefixTreeEncoder(os, false);
|
||||
|
||||
for(KeyValue kv : kvs){
|
||||
encoder.write(kv);
|
||||
}
|
||||
encoder.flush();
|
||||
|
||||
System.out.println("-- qualifier SortedPtBuilderNodes --");
|
||||
for(TokenizerNode tokenizer : encoder.getQualifierWriter().getNonLeaves()){
|
||||
System.out.println(tokenizer);
|
||||
}
|
||||
for(TokenizerNode tokenizerNode : encoder.getQualifierWriter().getLeaves()){
|
||||
System.out.println(tokenizerNode);
|
||||
}
|
||||
|
||||
System.out.println("-- qualifier PtColumnNodeWriters --");
|
||||
for(ColumnNodeWriter writer : encoder.getQualifierWriter().getColumnNodeWriters()){
|
||||
System.out.println(writer);
|
||||
}
|
||||
|
||||
System.out.println("-- rowKey SortedPtBuilderNodes --");
|
||||
for(TokenizerNode tokenizerNode : encoder.getRowWriter().getNonLeaves()){
|
||||
System.out.println(tokenizerNode);
|
||||
}
|
||||
for(TokenizerNode tokenizerNode : encoder.getRowWriter().getLeaves()){
|
||||
System.out.println(tokenizerNode);
|
||||
}
|
||||
|
||||
System.out.println("-- row PtRowNodeWriters --");
|
||||
for(RowNodeWriter writer : encoder.getRowWriter().getNonLeafWriters()){
|
||||
System.out.println(writer);
|
||||
}
|
||||
for(RowNodeWriter writer : encoder.getRowWriter().getLeafWriters()){
|
||||
System.out.println(writer);
|
||||
}
|
||||
|
||||
System.out.println("-- concatenated values --");
|
||||
System.out.println(Bytes.toStringBinary(encoder.getValueByteRange().deepCopyToNewArray()));
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<KeyValue> getInputs() {
|
||||
return kvs;
|
||||
}
|
||||
|
||||
}
|
|
@ -1,45 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.hbase.codec.prefixtree.timestamp;
|
||||
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.timestamp.data.TestTimestampDataBasic;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.timestamp.data.TestTimestampDataNumbers;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.timestamp.data.TestTimestampDataRepeats;
|
||||
|
||||
import org.apache.hadoop.hbase.shaded.com.google.common.collect.Lists;
|
||||
|
||||
public interface TestTimestampData {
|
||||
|
||||
List<Long> getInputs();
|
||||
long getMinimum();
|
||||
List<Long> getOutputs();
|
||||
|
||||
class InMemory {
|
||||
public Collection<Object[]> getAllAsObjectArray() {
|
||||
List<Object[]> all = Lists.newArrayList();
|
||||
all.add(new Object[] { new TestTimestampDataBasic() });
|
||||
all.add(new Object[] { new TestTimestampDataNumbers() });
|
||||
all.add(new Object[] { new TestTimestampDataRepeats() });
|
||||
return all;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,98 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.hbase.codec.prefixtree.timestamp;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.util.Collection;
|
||||
|
||||
import org.apache.hadoop.hbase.nio.SingleByteBuff;
|
||||
import org.apache.hadoop.hbase.testclassification.MiscTests;
|
||||
import org.apache.hadoop.hbase.testclassification.SmallTests;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeBlockMeta;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.decode.timestamp.TimestampDecoder;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.encode.other.LongEncoder;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
import org.junit.experimental.categories.Category;
|
||||
import org.junit.runner.RunWith;
|
||||
import org.junit.runners.Parameterized;
|
||||
import org.junit.runners.Parameterized.Parameters;
|
||||
|
||||
@Category({MiscTests.class, SmallTests.class})
|
||||
@RunWith(Parameterized.class)
|
||||
public class TestTimestampEncoder {
|
||||
|
||||
@Parameters
|
||||
public static Collection<Object[]> parameters() {
|
||||
return new TestTimestampData.InMemory().getAllAsObjectArray();
|
||||
}
|
||||
|
||||
private TestTimestampData timestamps;
|
||||
private PrefixTreeBlockMeta blockMeta;
|
||||
private LongEncoder encoder;
|
||||
private byte[] bytes;
|
||||
private TimestampDecoder decoder;
|
||||
|
||||
public TestTimestampEncoder(TestTimestampData testTimestamps) throws IOException {
|
||||
this.timestamps = testTimestamps;
|
||||
this.blockMeta = new PrefixTreeBlockMeta();
|
||||
this.blockMeta.setNumMetaBytes(0);
|
||||
this.blockMeta.setNumRowBytes(0);
|
||||
this.blockMeta.setNumQualifierBytes(0);
|
||||
this.encoder = new LongEncoder();
|
||||
for (Long ts : testTimestamps.getInputs()) {
|
||||
encoder.add(ts);
|
||||
}
|
||||
encoder.compile();
|
||||
blockMeta.setTimestampFields(encoder);
|
||||
bytes = encoder.getByteArray();
|
||||
decoder = new TimestampDecoder();
|
||||
decoder.initOnBlock(blockMeta, new SingleByteBuff(ByteBuffer.wrap(bytes)));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCompressorMinimum() {
|
||||
Assert.assertEquals(timestamps.getMinimum(), encoder.getMin());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCompressorRoundTrip() {
|
||||
long[] outputs = encoder.getSortedUniqueTimestamps();
|
||||
for (int i = 0; i < timestamps.getOutputs().size(); ++i) {
|
||||
long input = timestamps.getOutputs().get(i);
|
||||
long output = outputs[i];
|
||||
Assert.assertEquals(input, output);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testReaderMinimum() {
|
||||
Assert.assertEquals(timestamps.getMinimum(), decoder.getLong(0));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testReaderRoundTrip() {
|
||||
for (int i = 0; i < timestamps.getOutputs().size(); ++i) {
|
||||
long input = timestamps.getOutputs().get(i);
|
||||
long output = decoder.getLong(i);
|
||||
Assert.assertEquals(input, output);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,54 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.hbase.codec.prefixtree.timestamp.data;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.timestamp.TestTimestampData;
|
||||
|
||||
public class TestTimestampDataBasic implements TestTimestampData {
|
||||
|
||||
@Override
|
||||
public List<Long> getInputs() {
|
||||
List<Long> d = new ArrayList<>(5);
|
||||
d.add(5L);
|
||||
d.add(3L);
|
||||
d.add(0L);
|
||||
d.add(1L);
|
||||
d.add(3L);
|
||||
return d;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getMinimum() {
|
||||
return 0L;
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<Long> getOutputs() {
|
||||
List<Long> d = new ArrayList<>(4);
|
||||
d.add(0L);
|
||||
d.add(1L);
|
||||
d.add(3L);
|
||||
d.add(5L);
|
||||
return d;
|
||||
}
|
||||
|
||||
}
|
|
@ -1,56 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.hbase.codec.prefixtree.timestamp.data;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.timestamp.TestTimestampData;
|
||||
|
||||
public class TestTimestampDataNumbers implements TestTimestampData {
|
||||
|
||||
private int shift = 8;
|
||||
|
||||
@Override
|
||||
public List<Long> getInputs() {
|
||||
List<Long> d = new ArrayList<>(5);
|
||||
d.add(5L << shift);
|
||||
d.add(3L << shift);
|
||||
d.add(7L << shift);
|
||||
d.add(1L << shift);
|
||||
d.add(3L << shift);
|
||||
return d;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getMinimum() {
|
||||
return 1L << shift;
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<Long> getOutputs() {
|
||||
List<Long> d = new ArrayList<>(4);
|
||||
d.add(1L << shift);
|
||||
d.add(3L << shift);
|
||||
d.add(5L << shift);
|
||||
d.add(7L << shift);
|
||||
return d;
|
||||
}
|
||||
|
||||
}
|
|
@ -1,52 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.hbase.codec.prefixtree.timestamp.data;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.timestamp.TestTimestampData;
|
||||
|
||||
public class TestTimestampDataRepeats implements TestTimestampData {
|
||||
|
||||
private static long t = 1234567890L;
|
||||
|
||||
@Override
|
||||
public List<Long> getInputs() {
|
||||
List<Long> d = new ArrayList<>(5);
|
||||
d.add(t);
|
||||
d.add(t);
|
||||
d.add(t);
|
||||
d.add(t);
|
||||
d.add(t);
|
||||
return d;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getMinimum() {
|
||||
return t;
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<Long> getOutputs() {
|
||||
List<Long> d = new ArrayList<>();
|
||||
return d;
|
||||
}
|
||||
|
||||
}
|
|
@ -1,39 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.hbase.util.bytes;
|
||||
|
||||
import junit.framework.Assert;
|
||||
|
||||
import org.apache.hadoop.hbase.testclassification.MiscTests;
|
||||
import org.apache.hadoop.hbase.testclassification.SmallTests;
|
||||
import org.apache.hadoop.hbase.util.ByteRange;
|
||||
import org.apache.hadoop.hbase.util.SimpleMutableByteRange;
|
||||
import org.junit.Test;
|
||||
import org.junit.experimental.categories.Category;
|
||||
|
||||
@Category({MiscTests.class, SmallTests.class})
|
||||
public class TestByteRange {
|
||||
|
||||
@Test
|
||||
public void testConstructor() {
|
||||
ByteRange b = new SimpleMutableByteRange(new byte[] { 0, 1, 2 });
|
||||
Assert.assertEquals(3, b.getLength());
|
||||
}
|
||||
|
||||
}
|
|
@ -1,32 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.hbase.util.comparator;
|
||||
|
||||
import java.util.Comparator;
|
||||
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
|
||||
public class ByteArrayComparator implements Comparator<byte[]> {
|
||||
|
||||
@Override
|
||||
public int compare(byte[] a, byte[] b) {
|
||||
return Bytes.compareTo(a, b);
|
||||
}
|
||||
|
||||
}
|
|
@ -1,33 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.hbase.util.number;
|
||||
|
||||
import java.text.DecimalFormat;
|
||||
|
||||
public class NumberFormatter {
|
||||
|
||||
public static String addCommas(final Number pValue) {
|
||||
if (pValue == null) {
|
||||
return null;
|
||||
}
|
||||
String format = "###,###,###,###,###,###,###,###.#####################";
|
||||
return new DecimalFormat(format).format(pValue);// biggest is 19 digits
|
||||
}
|
||||
|
||||
}
|
|
@ -1,34 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.hbase.util.number;
|
||||
|
||||
import java.util.Random;
|
||||
|
||||
public class RandomNumberUtils {
|
||||
|
||||
public static long nextPositiveLong(Random random) {
|
||||
while (true) {
|
||||
long value = random.nextLong();
|
||||
if (value > 0) {
|
||||
return value;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -1,126 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.hbase.util.vint;
|
||||
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.hadoop.hbase.testclassification.MiscTests;
|
||||
import org.apache.hadoop.hbase.testclassification.SmallTests;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
import org.junit.experimental.categories.Category;
|
||||
|
||||
/********************** tests *************************/
|
||||
|
||||
@Category({MiscTests.class, SmallTests.class})
|
||||
public class TestFIntTool {
|
||||
@Test
|
||||
public void testLeadingZeros() {
|
||||
Assert.assertEquals(64, Long.numberOfLeadingZeros(0));
|
||||
Assert.assertEquals(63, Long.numberOfLeadingZeros(1));
|
||||
Assert.assertEquals(0, Long.numberOfLeadingZeros(Long.MIN_VALUE));
|
||||
Assert.assertEquals(0, Long.numberOfLeadingZeros(-1));
|
||||
Assert.assertEquals(1, Long.numberOfLeadingZeros(Long.MAX_VALUE));
|
||||
Assert.assertEquals(1, Long.numberOfLeadingZeros(Long.MAX_VALUE - 1));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMaxValueForNumBytes() {
|
||||
Assert.assertEquals(255, UFIntTool.maxValueForNumBytes(1));
|
||||
Assert.assertEquals(65535, UFIntTool.maxValueForNumBytes(2));
|
||||
Assert.assertEquals(0xffffff, UFIntTool.maxValueForNumBytes(3));
|
||||
Assert.assertEquals(0xffffffffffffffL, UFIntTool.maxValueForNumBytes(7));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNumBytes() {
|
||||
Assert.assertEquals(1, UFIntTool.numBytes(0));
|
||||
Assert.assertEquals(1, UFIntTool.numBytes(1));
|
||||
Assert.assertEquals(1, UFIntTool.numBytes(255));
|
||||
Assert.assertEquals(2, UFIntTool.numBytes(256));
|
||||
Assert.assertEquals(2, UFIntTool.numBytes(65535));
|
||||
Assert.assertEquals(3, UFIntTool.numBytes(65536));
|
||||
Assert.assertEquals(4, UFIntTool.numBytes(0xffffffffL));
|
||||
Assert.assertEquals(5, UFIntTool.numBytes(0x100000000L));
|
||||
Assert.assertEquals(4, UFIntTool.numBytes(Integer.MAX_VALUE));
|
||||
Assert.assertEquals(8, UFIntTool.numBytes(Long.MAX_VALUE));
|
||||
Assert.assertEquals(8, UFIntTool.numBytes(Long.MAX_VALUE - 1));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGetBytes() {
|
||||
Assert.assertArrayEquals(new byte[] { 0 }, UFIntTool.getBytes(1, 0));
|
||||
Assert.assertArrayEquals(new byte[] { 1 }, UFIntTool.getBytes(1, 1));
|
||||
Assert.assertArrayEquals(new byte[] { -1 }, UFIntTool.getBytes(1, 255));
|
||||
Assert.assertArrayEquals(new byte[] { 1, 0 }, UFIntTool.getBytes(2, 256));
|
||||
Assert.assertArrayEquals(new byte[] { 1, 3 }, UFIntTool.getBytes(2, 256 + 3));
|
||||
Assert.assertArrayEquals(new byte[] { 1, -128 }, UFIntTool.getBytes(2, 256 + 128));
|
||||
Assert.assertArrayEquals(new byte[] { 1, -1 }, UFIntTool.getBytes(2, 256 + 255));
|
||||
Assert.assertArrayEquals(new byte[] { 127, -1, -1, -1 },
|
||||
UFIntTool.getBytes(4, Integer.MAX_VALUE));
|
||||
Assert.assertArrayEquals(new byte[] { 127, -1, -1, -1, -1, -1, -1, -1 },
|
||||
UFIntTool.getBytes(8, Long.MAX_VALUE));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFromBytes() {
|
||||
Assert.assertEquals(0, UFIntTool.fromBytes(new byte[] { 0 }));
|
||||
Assert.assertEquals(1, UFIntTool.fromBytes(new byte[] { 1 }));
|
||||
Assert.assertEquals(255, UFIntTool.fromBytes(new byte[] { -1 }));
|
||||
Assert.assertEquals(256, UFIntTool.fromBytes(new byte[] { 1, 0 }));
|
||||
Assert.assertEquals(256 + 3, UFIntTool.fromBytes(new byte[] { 1, 3 }));
|
||||
Assert.assertEquals(256 + 128, UFIntTool.fromBytes(new byte[] { 1, -128 }));
|
||||
Assert.assertEquals(256 + 255, UFIntTool.fromBytes(new byte[] { 1, -1 }));
|
||||
Assert.assertEquals(Integer.MAX_VALUE, UFIntTool.fromBytes(new byte[] { 127, -1, -1, -1 }));
|
||||
Assert.assertEquals(Long.MAX_VALUE,
|
||||
UFIntTool.fromBytes(new byte[] { 127, -1, -1, -1, -1, -1, -1, -1 }));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testRoundTrips() {
|
||||
long[] values = new long[] { 0, 1, 2, 255, 256, 31123, 65535, 65536, 65537, 0xfffffeL,
|
||||
0xffffffL, 0x1000000L, 0x1000001L, Integer.MAX_VALUE - 1, Integer.MAX_VALUE,
|
||||
(long) Integer.MAX_VALUE + 1, Long.MAX_VALUE - 1, Long.MAX_VALUE };
|
||||
for (int i = 0; i < values.length; ++i) {
|
||||
Assert.assertEquals(values[i], UFIntTool.fromBytes(UFIntTool.getBytes(8, values[i])));
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testWriteBytes() throws IOException {// copied from testGetBytes
|
||||
Assert.assertArrayEquals(new byte[] { 0 }, bytesViaOutputStream(1, 0));
|
||||
Assert.assertArrayEquals(new byte[] { 1 }, bytesViaOutputStream(1, 1));
|
||||
Assert.assertArrayEquals(new byte[] { -1 }, bytesViaOutputStream(1, 255));
|
||||
Assert.assertArrayEquals(new byte[] { 1, 0 }, bytesViaOutputStream(2, 256));
|
||||
Assert.assertArrayEquals(new byte[] { 1, 3 }, bytesViaOutputStream(2, 256 + 3));
|
||||
Assert.assertArrayEquals(new byte[] { 1, -128 }, bytesViaOutputStream(2, 256 + 128));
|
||||
Assert.assertArrayEquals(new byte[] { 1, -1 }, bytesViaOutputStream(2, 256 + 255));
|
||||
Assert.assertArrayEquals(new byte[] { 127, -1, -1, -1 },
|
||||
bytesViaOutputStream(4, Integer.MAX_VALUE));
|
||||
Assert.assertArrayEquals(new byte[] { 127, -1, -1, -1, -1, -1, -1, -1 },
|
||||
bytesViaOutputStream(8, Long.MAX_VALUE));
|
||||
}
|
||||
|
||||
private byte[] bytesViaOutputStream(int outputWidth, long value) throws IOException {
|
||||
ByteArrayOutputStream os = new ByteArrayOutputStream();
|
||||
UFIntTool.writeBytes(outputWidth, value, os);
|
||||
return os.toByteArray();
|
||||
}
|
||||
}
|
|
@ -1,105 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.hbase.util.vint;
|
||||
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.util.Random;
|
||||
|
||||
import org.apache.hadoop.hbase.nio.SingleByteBuff;
|
||||
import org.apache.hadoop.hbase.testclassification.MiscTests;
|
||||
import org.apache.hadoop.hbase.testclassification.SmallTests;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
import org.junit.experimental.categories.Category;
|
||||
|
||||
@Category({MiscTests.class, SmallTests.class})
|
||||
public class TestVIntTool {
|
||||
|
||||
@Test
|
||||
public void testNumBytes() {
|
||||
Assert.assertEquals(1, UVIntTool.numBytes(0));
|
||||
Assert.assertEquals(1, UVIntTool.numBytes(1));
|
||||
Assert.assertEquals(1, UVIntTool.numBytes(100));
|
||||
Assert.assertEquals(1, UVIntTool.numBytes(126));
|
||||
Assert.assertEquals(1, UVIntTool.numBytes(127));
|
||||
Assert.assertEquals(2, UVIntTool.numBytes(128));
|
||||
Assert.assertEquals(2, UVIntTool.numBytes(129));
|
||||
Assert.assertEquals(5, UVIntTool.numBytes(Integer.MAX_VALUE));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testWriteBytes() throws IOException {
|
||||
Assert.assertArrayEquals(new byte[] { 0 }, bytesViaOutputStream(0));
|
||||
Assert.assertArrayEquals(new byte[] { 1 }, bytesViaOutputStream(1));
|
||||
Assert.assertArrayEquals(new byte[] { 63 }, bytesViaOutputStream(63));
|
||||
Assert.assertArrayEquals(new byte[] { 127 }, bytesViaOutputStream(127));
|
||||
Assert.assertArrayEquals(new byte[] { -128, 1 }, bytesViaOutputStream(128));
|
||||
Assert.assertArrayEquals(new byte[] { -128 + 27, 1 }, bytesViaOutputStream(155));
|
||||
Assert.assertArrayEquals(UVIntTool.MAX_VALUE_BYTES, bytesViaOutputStream(Integer.MAX_VALUE));
|
||||
}
|
||||
|
||||
private byte[] bytesViaOutputStream(int value) throws IOException {
|
||||
ByteArrayOutputStream os = new ByteArrayOutputStream();
|
||||
UVIntTool.writeBytes(value, os);
|
||||
return os.toByteArray();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testToBytes() {
|
||||
Assert.assertArrayEquals(new byte[] { 0 }, UVIntTool.getBytes(0));
|
||||
Assert.assertArrayEquals(new byte[] { 1 }, UVIntTool.getBytes(1));
|
||||
Assert.assertArrayEquals(new byte[] { 63 }, UVIntTool.getBytes(63));
|
||||
Assert.assertArrayEquals(new byte[] { 127 }, UVIntTool.getBytes(127));
|
||||
Assert.assertArrayEquals(new byte[] { -128, 1 }, UVIntTool.getBytes(128));
|
||||
Assert.assertArrayEquals(new byte[] { -128 + 27, 1 }, UVIntTool.getBytes(155));
|
||||
Assert.assertArrayEquals(UVIntTool.MAX_VALUE_BYTES, UVIntTool.getBytes(Integer.MAX_VALUE));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFromBytes() {
|
||||
Assert.assertEquals(Integer.MAX_VALUE,
|
||||
UVIntTool.getInt(new SingleByteBuff(ByteBuffer.wrap(UVIntTool.MAX_VALUE_BYTES)), 0));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testRoundTrips() {
|
||||
Random random = new Random();
|
||||
for (int i = 0; i < 10000; ++i) {
|
||||
int value = random.nextInt(Integer.MAX_VALUE);
|
||||
byte[] bytes = UVIntTool.getBytes(value);
|
||||
int roundTripped = UVIntTool.getInt(new SingleByteBuff(ByteBuffer.wrap(bytes)), 0);
|
||||
Assert.assertEquals(value, roundTripped);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testInputStreams() throws IOException {
|
||||
ByteArrayInputStream is;
|
||||
is = new ByteArrayInputStream(new byte[] { 0 });
|
||||
Assert.assertEquals(0, UVIntTool.getInt(is));
|
||||
is = new ByteArrayInputStream(new byte[] { 5 });
|
||||
Assert.assertEquals(5, UVIntTool.getInt(is));
|
||||
is = new ByteArrayInputStream(new byte[] { -128 + 27, 1 });
|
||||
Assert.assertEquals(155, UVIntTool.getInt(is));
|
||||
}
|
||||
|
||||
}
|
|
@ -1,113 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.hbase.util.vint;
|
||||
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.IOException;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.util.Random;
|
||||
|
||||
import org.apache.hadoop.hbase.nio.SingleByteBuff;
|
||||
import org.apache.hadoop.hbase.testclassification.MiscTests;
|
||||
import org.apache.hadoop.hbase.testclassification.SmallTests;
|
||||
import org.apache.hadoop.hbase.util.number.RandomNumberUtils;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
import org.junit.experimental.categories.Category;
|
||||
|
||||
@Category({MiscTests.class, SmallTests.class})
|
||||
public class TestVLongTool {
|
||||
|
||||
@Test
|
||||
public void testNumBytes() {
|
||||
Assert.assertEquals(1, UVLongTool.numBytes(0));
|
||||
Assert.assertEquals(1, UVLongTool.numBytes(1));
|
||||
Assert.assertEquals(1, UVLongTool.numBytes(100));
|
||||
Assert.assertEquals(1, UVLongTool.numBytes(126));
|
||||
Assert.assertEquals(1, UVLongTool.numBytes(127));
|
||||
Assert.assertEquals(2, UVLongTool.numBytes(128));
|
||||
Assert.assertEquals(2, UVLongTool.numBytes(129));
|
||||
Assert.assertEquals(9, UVLongTool.numBytes(Long.MAX_VALUE));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testToBytes() {
|
||||
Assert.assertArrayEquals(new byte[] { 0 }, UVLongTool.getBytes(0));
|
||||
Assert.assertArrayEquals(new byte[] { 1 }, UVLongTool.getBytes(1));
|
||||
Assert.assertArrayEquals(new byte[] { 63 }, UVLongTool.getBytes(63));
|
||||
Assert.assertArrayEquals(new byte[] { 127 }, UVLongTool.getBytes(127));
|
||||
Assert.assertArrayEquals(new byte[] { -128, 1 }, UVLongTool.getBytes(128));
|
||||
Assert.assertArrayEquals(new byte[] { -128 + 27, 1 }, UVLongTool.getBytes(155));
|
||||
Assert.assertArrayEquals(UVLongTool.MAX_VALUE_BYTES, UVLongTool.getBytes(Long.MAX_VALUE));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFromBytes() {
|
||||
Assert.assertEquals(Long.MAX_VALUE, UVLongTool.getLong(UVLongTool.MAX_VALUE_BYTES));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFromBytesOffset() {
|
||||
Assert.assertEquals(Long.MAX_VALUE,
|
||||
UVLongTool.getLong(new SingleByteBuff(ByteBuffer.wrap(UVLongTool.MAX_VALUE_BYTES)), 0));
|
||||
|
||||
long ms = 1318966363481L;
|
||||
// System.out.println(ms);
|
||||
byte[] bytes = UVLongTool.getBytes(ms);
|
||||
// System.out.println(Arrays.toString(bytes));
|
||||
long roundTripped = UVLongTool.getLong(new SingleByteBuff(ByteBuffer.wrap(bytes)), 0);
|
||||
Assert.assertEquals(ms, roundTripped);
|
||||
|
||||
int calculatedNumBytes = UVLongTool.numBytes(ms);
|
||||
int actualNumBytes = bytes.length;
|
||||
Assert.assertEquals(actualNumBytes, calculatedNumBytes);
|
||||
|
||||
byte[] shiftedBytes = new byte[1000];
|
||||
int shift = 33;
|
||||
System.arraycopy(bytes, 0, shiftedBytes, shift, bytes.length);
|
||||
long shiftedRoundTrip =
|
||||
UVLongTool.getLong(new SingleByteBuff(ByteBuffer.wrap(shiftedBytes)), shift);
|
||||
Assert.assertEquals(ms, shiftedRoundTrip);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testRoundTrips() {
|
||||
Random random = new Random();
|
||||
for (int i = 0; i < 10000; ++i) {
|
||||
long value = RandomNumberUtils.nextPositiveLong(random);
|
||||
byte[] bytes = UVLongTool.getBytes(value);
|
||||
long roundTripped = UVLongTool.getLong(bytes);
|
||||
Assert.assertEquals(value, roundTripped);
|
||||
int calculatedNumBytes = UVLongTool.numBytes(value);
|
||||
int actualNumBytes = bytes.length;
|
||||
Assert.assertEquals(actualNumBytes, calculatedNumBytes);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testInputStreams() throws IOException {
|
||||
ByteArrayInputStream is;
|
||||
is = new ByteArrayInputStream(new byte[] { 0 });
|
||||
Assert.assertEquals(0, UVLongTool.getLong(is));
|
||||
is = new ByteArrayInputStream(new byte[] { 5 });
|
||||
Assert.assertEquals(5, UVLongTool.getLong(is));
|
||||
is = new ByteArrayInputStream(new byte[] { -128 + 27, 1 });
|
||||
Assert.assertEquals(155, UVLongTool.getLong(is));
|
||||
}
|
||||
}
|
|
@ -1,68 +0,0 @@
|
|||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# Define some default values that can be overridden by system properties
|
||||
hbase.root.logger=INFO,console
|
||||
hbase.log.dir=.
|
||||
hbase.log.file=hbase.log
|
||||
|
||||
# Define the root logger to the system property "hbase.root.logger".
|
||||
log4j.rootLogger=${hbase.root.logger}
|
||||
|
||||
# Logging Threshold
|
||||
log4j.threshold=ALL
|
||||
|
||||
#
|
||||
# Daily Rolling File Appender
|
||||
#
|
||||
log4j.appender.DRFA=org.apache.log4j.DailyRollingFileAppender
|
||||
log4j.appender.DRFA.File=${hbase.log.dir}/${hbase.log.file}
|
||||
|
||||
# Rollver at midnight
|
||||
log4j.appender.DRFA.DatePattern=.yyyy-MM-dd
|
||||
|
||||
# 30-day backup
|
||||
#log4j.appender.DRFA.MaxBackupIndex=30
|
||||
log4j.appender.DRFA.layout=org.apache.log4j.PatternLayout
|
||||
# Debugging Pattern format
|
||||
log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %-5p [%t] %C{2}(%L): %m%n
|
||||
|
||||
|
||||
#
|
||||
# console
|
||||
# Add "console" to rootlogger above if you want to use this
|
||||
#
|
||||
log4j.appender.console=org.apache.log4j.ConsoleAppender
|
||||
log4j.appender.console.target=System.err
|
||||
log4j.appender.console.layout=org.apache.log4j.PatternLayout
|
||||
log4j.appender.console.layout.ConversionPattern=%d{ISO8601} %-5p [%t] %C{2}(%L): %m%n
|
||||
|
||||
# Custom Logging levels
|
||||
|
||||
#log4j.logger.org.apache.hadoop.fs.FSNamesystem=DEBUG
|
||||
|
||||
log4j.logger.org.apache.hadoop=WARN
|
||||
log4j.logger.org.apache.zookeeper=ERROR
|
||||
log4j.logger.org.apache.hadoop.hbase=DEBUG
|
||||
|
||||
#These settings are workarounds against spurious logs from the minicluster.
|
||||
#See HBASE-4709
|
||||
log4j.logger.org.apache.hadoop.metrics2.impl.MetricsConfig=WARN
|
||||
log4j.logger.org.apache.hadoop.metrics2.impl.MetricsSinkAdapter=WARN
|
||||
log4j.logger.org.apache.hadoop.metrics2.impl.MetricsSystemImpl=WARN
|
||||
log4j.logger.org.apache.hadoop.metrics2.util.MBeans=WARN
|
||||
# Enable this to get detailed connection error/retry logging.
|
||||
# log4j.logger.org.apache.hadoop.hbase.client.ConnectionImplementation=TRACE
|
|
@ -392,11 +392,6 @@
|
|||
<groupId>org.apache.hbase</groupId>
|
||||
<artifactId>hbase-replication</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.hbase</groupId>
|
||||
<artifactId>hbase-prefix-tree</artifactId>
|
||||
<scope>runtime</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.hbase</groupId>
|
||||
<artifactId>hbase-common</artifactId>
|
||||
|
|
|
@ -792,11 +792,7 @@ public class HFileWriterImpl implements HFile.Writer {
|
|||
int avgValueLen =
|
||||
entryCount == 0 ? 0 : (int) (totalValueLength / entryCount);
|
||||
fileInfo.append(FileInfo.AVG_VALUE_LEN, Bytes.toBytes(avgValueLen), false);
|
||||
if (hFileContext.getDataBlockEncoding() == DataBlockEncoding.PREFIX_TREE) {
|
||||
// In case of Prefix Tree encoding, we always write tags information into HFiles even if all
|
||||
// KVs are having no tags.
|
||||
fileInfo.append(FileInfo.MAX_TAGS_LEN, Bytes.toBytes(this.maxTagsLength), false);
|
||||
} else if (hFileContext.isIncludesTags()) {
|
||||
if (hFileContext.isIncludesTags()) {
|
||||
// When tags are not being written in this file, MAX_TAGS_LEN is excluded
|
||||
// from the FileInfo
|
||||
fileInfo.append(FileInfo.MAX_TAGS_LEN, Bytes.toBytes(this.maxTagsLength), false);
|
||||
|
|
|
@ -85,7 +85,6 @@ import org.apache.hadoop.hbase.filter.SingleColumnValueFilter;
|
|||
import org.apache.hadoop.hbase.filter.SubstringComparator;
|
||||
import org.apache.hadoop.hbase.filter.ValueFilter;
|
||||
import org.apache.hadoop.hbase.filter.WhileMatchFilter;
|
||||
import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
|
||||
import org.apache.hadoop.hbase.io.hfile.BlockCache;
|
||||
import org.apache.hadoop.hbase.io.hfile.CacheConfig;
|
||||
import org.apache.hadoop.hbase.ipc.CoprocessorRpcChannel;
|
||||
|
@ -218,7 +217,6 @@ public class TestFromClientSide {
|
|||
final byte[] T3 = Bytes.toBytes("T3");
|
||||
HColumnDescriptor hcd = new HColumnDescriptor(FAMILY)
|
||||
.setKeepDeletedCells(KeepDeletedCells.TRUE)
|
||||
.setDataBlockEncoding(DataBlockEncoding.PREFIX_TREE)
|
||||
.setMaxVersions(3);
|
||||
|
||||
HTableDescriptor desc = new HTableDescriptor(tableName);
|
||||
|
|
|
@ -42,9 +42,7 @@ import org.apache.hadoop.hbase.HConstants;
|
|||
import org.apache.hadoop.hbase.PrivateCellUtil;
|
||||
import org.apache.hadoop.hbase.KeyValue;
|
||||
import org.apache.hadoop.hbase.KeyValue.Type;
|
||||
import org.apache.hadoop.hbase.KeyValueUtil;
|
||||
import org.apache.hadoop.hbase.Tag;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeSeeker;
|
||||
import org.apache.hadoop.hbase.io.ByteArrayOutputStream;
|
||||
import org.apache.hadoop.hbase.io.compress.Compression;
|
||||
import org.apache.hadoop.hbase.io.hfile.HFileContext;
|
||||
|
@ -194,9 +192,6 @@ public class TestDataBlockEncoders {
|
|||
List<DataBlockEncoder.EncodedSeeker> encodedSeekers = new ArrayList<>();
|
||||
for (DataBlockEncoding encoding : DataBlockEncoding.values()) {
|
||||
LOG.info("Encoding: " + encoding);
|
||||
// Off heap block data support not added for PREFIX_TREE DBE yet.
|
||||
// TODO remove this once support is added. HBASE-12298
|
||||
if (this.useOffheapData && encoding == DataBlockEncoding.PREFIX_TREE) continue;
|
||||
DataBlockEncoder encoder = encoding.getEncoder();
|
||||
if (encoder == null) {
|
||||
continue;
|
||||
|
@ -271,9 +266,6 @@ public class TestDataBlockEncoders {
|
|||
List<KeyValue> sampleKv = generator.generateTestKeyValues(NUMBER_OF_KV, includesTags);
|
||||
|
||||
for (DataBlockEncoding encoding : DataBlockEncoding.values()) {
|
||||
// Off heap block data support not added for PREFIX_TREE DBE yet.
|
||||
// TODO remove this once support is added. HBASE-12298
|
||||
if (this.useOffheapData && encoding == DataBlockEncoding.PREFIX_TREE) continue;
|
||||
if (encoding.getEncoder() == null) {
|
||||
continue;
|
||||
}
|
||||
|
@ -317,9 +309,6 @@ public class TestDataBlockEncoders {
|
|||
List<KeyValue> sampleKv = generator.generateTestKeyValues(NUMBER_OF_KV, includesTags);
|
||||
|
||||
for (DataBlockEncoding encoding : DataBlockEncoding.values()) {
|
||||
// Off heap block data support not added for PREFIX_TREE DBE yet.
|
||||
// TODO remove this once support is added. HBASE-12298
|
||||
if (this.useOffheapData && encoding == DataBlockEncoding.PREFIX_TREE) continue;
|
||||
if (encoding.getEncoder() == null) {
|
||||
continue;
|
||||
}
|
||||
|
@ -346,12 +335,7 @@ public class TestDataBlockEncoders {
|
|||
|
||||
Cell actualKeyValue = seeker.getCell();
|
||||
ByteBuffer actualKey = null;
|
||||
if (seeker instanceof PrefixTreeSeeker) {
|
||||
byte[] serializedKey = PrivateCellUtil.getCellKeySerializedAsKeyValueKey(seeker.getKey());
|
||||
actualKey = ByteBuffer.wrap(KeyValueUtil.createKeyValueFromKey(serializedKey).getKey());
|
||||
} else {
|
||||
actualKey = ByteBuffer.wrap(((KeyValue) seeker.getKey()).getKey());
|
||||
}
|
||||
actualKey = ByteBuffer.wrap(((KeyValue) seeker.getKey()).getKey());
|
||||
ByteBuffer actualValue = seeker.getValueShallowCopy();
|
||||
|
||||
if (expectedKeyValue != null) {
|
||||
|
|
|
@ -1,192 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.hbase.io.encoding;
|
||||
|
||||
import static org.junit.Assert.assertArrayEquals;
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertFalse;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.hbase.Cell;
|
||||
import org.apache.hadoop.hbase.CellScanner;
|
||||
import org.apache.hadoop.hbase.HBaseTestingUtility;
|
||||
import org.apache.hadoop.hbase.HColumnDescriptor;
|
||||
import org.apache.hadoop.hbase.HRegionInfo;
|
||||
import org.apache.hadoop.hbase.HTableDescriptor;
|
||||
import org.apache.hadoop.hbase.TableName;
|
||||
import org.apache.hadoop.hbase.client.Put;
|
||||
import org.apache.hadoop.hbase.client.Result;
|
||||
import org.apache.hadoop.hbase.client.Scan;
|
||||
import org.apache.hadoop.hbase.regionserver.HRegion;
|
||||
import org.apache.hadoop.hbase.regionserver.Region;
|
||||
import org.apache.hadoop.hbase.regionserver.RegionScanner;
|
||||
import org.apache.hadoop.hbase.testclassification.IOTests;
|
||||
import org.apache.hadoop.hbase.testclassification.SmallTests;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
import org.junit.After;
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
import org.junit.experimental.categories.Category;
|
||||
|
||||
@Category({ IOTests.class, SmallTests.class })
|
||||
public class TestPrefixTree {
|
||||
|
||||
private static final String row4 = "a-b-B-2-1402397300-1402416535";
|
||||
private static final byte[] row4_bytes = Bytes.toBytes(row4);
|
||||
private static final String row3 = "a-b-A-1-1402397227-1402415999";
|
||||
private static final byte[] row3_bytes = Bytes.toBytes(row3);
|
||||
private static final String row2 = "a-b-A-1-1402329600-1402396277";
|
||||
private static final byte[] row2_bytes = Bytes.toBytes(row2);
|
||||
private static final String row1 = "a-b-A-1";
|
||||
private static final byte[] row1_bytes = Bytes.toBytes(row1);
|
||||
|
||||
private final static byte[] fam = Bytes.toBytes("cf_1");
|
||||
private final static byte[] qual1 = Bytes.toBytes("qf_1");
|
||||
private final static byte[] qual2 = Bytes.toBytes("qf_2");
|
||||
|
||||
private final HBaseTestingUtility testUtil = new HBaseTestingUtility();
|
||||
|
||||
private HRegion region;
|
||||
|
||||
@Before
|
||||
public void setUp() throws Exception {
|
||||
TableName tableName = TableName.valueOf(getClass().getSimpleName());
|
||||
HTableDescriptor htd = new HTableDescriptor(tableName);
|
||||
htd.addFamily(new HColumnDescriptor(fam).setDataBlockEncoding(DataBlockEncoding.PREFIX_TREE));
|
||||
HRegionInfo info = new HRegionInfo(tableName, null, null, false);
|
||||
Path path = testUtil.getDataTestDir(getClass().getSimpleName());
|
||||
region = HBaseTestingUtility.createRegionAndWAL(info, path, testUtil.getConfiguration(), htd);
|
||||
}
|
||||
|
||||
@After
|
||||
public void tearDown() throws Exception {
|
||||
HBaseTestingUtility.closeRegionAndWAL(region);
|
||||
testUtil.cleanupTestDir();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testHBASE11728() throws Exception {
|
||||
Put put = new Put(Bytes.toBytes("a-b-0-0"));
|
||||
put.addColumn(fam, qual1, Bytes.toBytes("c1-value"));
|
||||
region.put(put);
|
||||
put = new Put(row1_bytes);
|
||||
put.addColumn(fam, qual1, Bytes.toBytes("c1-value"));
|
||||
region.put(put);
|
||||
put = new Put(row2_bytes);
|
||||
put.addColumn(fam, qual2, Bytes.toBytes("c2-value"));
|
||||
region.put(put);
|
||||
put = new Put(row3_bytes);
|
||||
put.addColumn(fam, qual2, Bytes.toBytes("c2-value-2"));
|
||||
region.put(put);
|
||||
put = new Put(row4_bytes);
|
||||
put.addColumn(fam, qual2, Bytes.toBytes("c2-value-3"));
|
||||
region.put(put);
|
||||
region.flush(true);
|
||||
String[] rows = new String[3];
|
||||
rows[0] = row1;
|
||||
rows[1] = row2;
|
||||
rows[2] = row3;
|
||||
byte[][] val = new byte[3][];
|
||||
val[0] = Bytes.toBytes("c1-value");
|
||||
val[1] = Bytes.toBytes("c2-value");
|
||||
val[2] = Bytes.toBytes("c2-value-2");
|
||||
Scan scan = new Scan();
|
||||
scan.setStartRow(row1_bytes);
|
||||
scan.setStopRow(Bytes.toBytes("a-b-A-1:"));
|
||||
|
||||
RegionScanner scanner = region.getScanner(scan);
|
||||
List<Cell> cells = new ArrayList<>();
|
||||
for (int i = 0; i < 3; i++) {
|
||||
assertEquals(i < 2, scanner.next(cells));
|
||||
CellScanner cellScanner = Result.create(cells).cellScanner();
|
||||
while (cellScanner.advance()) {
|
||||
assertEquals(rows[i], Bytes.toString(cellScanner.current().getRowArray(), cellScanner
|
||||
.current().getRowOffset(), cellScanner.current().getRowLength()));
|
||||
assertEquals(Bytes.toString(val[i]), Bytes.toString(cellScanner.current().getValueArray(),
|
||||
cellScanner.current().getValueOffset(), cellScanner.current().getValueLength()));
|
||||
}
|
||||
cells.clear();
|
||||
}
|
||||
scanner.close();
|
||||
|
||||
// Add column
|
||||
scan = new Scan();
|
||||
scan.addColumn(fam, qual2);
|
||||
scan.setStartRow(row1_bytes);
|
||||
scan.setStopRow(Bytes.toBytes("a-b-A-1:"));
|
||||
scanner = region.getScanner(scan);
|
||||
for (int i = 1; i < 3; i++) {
|
||||
assertEquals(i < 2, scanner.next(cells));
|
||||
CellScanner cellScanner = Result.create(cells).cellScanner();
|
||||
while (cellScanner.advance()) {
|
||||
assertEquals(rows[i], Bytes.toString(cellScanner.current().getRowArray(), cellScanner
|
||||
.current().getRowOffset(), cellScanner.current().getRowLength()));
|
||||
}
|
||||
cells.clear();
|
||||
}
|
||||
scanner.close();
|
||||
|
||||
scan = new Scan();
|
||||
scan.addColumn(fam, qual2);
|
||||
scan.setStartRow(Bytes.toBytes("a-b-A-1-"));
|
||||
scan.setStopRow(Bytes.toBytes("a-b-A-1:"));
|
||||
scanner = region.getScanner(scan);
|
||||
for (int i = 1; i < 3; i++) {
|
||||
assertEquals(i < 2, scanner.next(cells));
|
||||
CellScanner cellScanner = Result.create(cells).cellScanner();
|
||||
while (cellScanner.advance()) {
|
||||
assertEquals(rows[i], Bytes.toString(cellScanner.current().getRowArray(), cellScanner
|
||||
.current().getRowOffset(), cellScanner.current().getRowLength()));
|
||||
}
|
||||
cells.clear();
|
||||
}
|
||||
scanner.close();
|
||||
|
||||
scan = new Scan();
|
||||
scan.addColumn(fam, qual2);
|
||||
scan.setStartRow(Bytes.toBytes("a-b-A-1-140239"));
|
||||
scan.setStopRow(Bytes.toBytes("a-b-A-1:"));
|
||||
scanner = region.getScanner(scan);
|
||||
assertFalse(scanner.next(cells));
|
||||
assertFalse(cells.isEmpty());
|
||||
scanner.close();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testHBASE12817() throws IOException {
|
||||
for (int i = 0; i < 100; i++) {
|
||||
region
|
||||
.put(new Put(Bytes.toBytes("obj" + (2900 + i))).addColumn(fam, qual1, Bytes.toBytes(i)));
|
||||
}
|
||||
region.put(new Put(Bytes.toBytes("obj299")).addColumn(fam, qual1, Bytes.toBytes("whatever")));
|
||||
region.put(new Put(Bytes.toBytes("obj29")).addColumn(fam, qual1, Bytes.toBytes("whatever")));
|
||||
region.put(new Put(Bytes.toBytes("obj2")).addColumn(fam, qual1, Bytes.toBytes("whatever")));
|
||||
region.put(new Put(Bytes.toBytes("obj3")).addColumn(fam, qual1, Bytes.toBytes("whatever")));
|
||||
region.flush(true);
|
||||
Scan scan = new Scan(Bytes.toBytes("obj29995"));
|
||||
RegionScanner scanner = region.getScanner(scan);
|
||||
List<Cell> cells = new ArrayList<>();
|
||||
assertFalse(scanner.next(cells));
|
||||
assertArrayEquals(Bytes.toBytes("obj3"), Result.create(cells).getRow());
|
||||
}
|
||||
}
|
|
@ -1,338 +0,0 @@
|
|||
/**
|
||||
* Copyright The Apache Software Foundation
|
||||
*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with this
|
||||
* work for additional information regarding copyright ownership. The ASF
|
||||
* licenses this file to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
* License for the specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
package org.apache.hadoop.hbase.io.encoding;
|
||||
|
||||
import static org.junit.Assert.assertArrayEquals;
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertFalse;
|
||||
import static org.junit.Assert.assertNotNull;
|
||||
import static org.junit.Assert.fail;
|
||||
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.DataOutputStream;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
import java.util.Random;
|
||||
import java.util.concurrent.ConcurrentSkipListSet;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.hadoop.hbase.Cell;
|
||||
import org.apache.hadoop.hbase.CellComparatorImpl;
|
||||
import org.apache.hadoop.hbase.CellUtil;
|
||||
import org.apache.hadoop.hbase.PrivateCellUtil;
|
||||
import org.apache.hadoop.hbase.KeyValue;
|
||||
import org.apache.hadoop.hbase.KeyValueUtil;
|
||||
import org.apache.hadoop.hbase.Tag;
|
||||
import org.apache.hadoop.hbase.ArrayBackedTag;
|
||||
import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeCodec;
|
||||
import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
|
||||
import org.apache.hadoop.hbase.io.encoding.DataBlockEncoder.EncodedSeeker;
|
||||
import org.apache.hadoop.hbase.io.hfile.HFileContext;
|
||||
import org.apache.hadoop.hbase.io.hfile.HFileContextBuilder;
|
||||
import org.apache.hadoop.hbase.nio.SingleByteBuff;
|
||||
import org.apache.hadoop.hbase.testclassification.IOTests;
|
||||
import org.apache.hadoop.hbase.testclassification.SmallTests;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
import org.apache.hadoop.hbase.util.CollectionBackedScanner;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
import org.junit.experimental.categories.Category;
|
||||
import org.junit.runner.RunWith;
|
||||
import org.junit.runners.Parameterized;
|
||||
import org.junit.runners.Parameterized.Parameters;
|
||||
|
||||
/**
|
||||
* Tests scanning/seeking data with PrefixTree Encoding.
|
||||
*/
|
||||
@RunWith(Parameterized.class)
|
||||
@Category({IOTests.class, SmallTests.class})
|
||||
public class TestPrefixTreeEncoding {
|
||||
private static final Log LOG = LogFactory.getLog(TestPrefixTreeEncoding.class);
|
||||
private static final String CF = "EncodingTestCF";
|
||||
private static final byte[] CF_BYTES = Bytes.toBytes(CF);
|
||||
private static final int NUM_ROWS_PER_BATCH = 50;
|
||||
private static final int NUM_COLS_PER_ROW = 20;
|
||||
|
||||
private int numBatchesWritten = 0;
|
||||
private ConcurrentSkipListSet<Cell> kvset = new ConcurrentSkipListSet<>(CellComparatorImpl.COMPARATOR);
|
||||
|
||||
private static boolean formatRowNum = false;
|
||||
|
||||
@Parameters
|
||||
public static Collection<Object[]> parameters() {
|
||||
List<Object[]> paramList = new ArrayList<>();
|
||||
{
|
||||
paramList.add(new Object[] { false });
|
||||
paramList.add(new Object[] { true });
|
||||
}
|
||||
return paramList;
|
||||
}
|
||||
private final boolean includesTag;
|
||||
public TestPrefixTreeEncoding(boolean includesTag) {
|
||||
this.includesTag = includesTag;
|
||||
}
|
||||
|
||||
@Before
|
||||
public void setUp() throws Exception {
|
||||
kvset.clear();
|
||||
formatRowNum = false;
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSeekBeforeWithFixedData() throws Exception {
|
||||
formatRowNum = true;
|
||||
PrefixTreeCodec encoder = new PrefixTreeCodec();
|
||||
int batchId = numBatchesWritten++;
|
||||
HFileContext meta = new HFileContextBuilder()
|
||||
.withHBaseCheckSum(false)
|
||||
.withIncludesMvcc(false)
|
||||
.withIncludesTags(includesTag)
|
||||
.withCompression(Algorithm.NONE).build();
|
||||
HFileBlockEncodingContext blkEncodingCtx = new HFileBlockDefaultEncodingContext(
|
||||
DataBlockEncoding.PREFIX_TREE, new byte[0], meta);
|
||||
ByteArrayOutputStream baosInMemory = new ByteArrayOutputStream();
|
||||
DataOutputStream userDataStream = new DataOutputStream(baosInMemory);
|
||||
generateFixedTestData(kvset, batchId, false, includesTag, encoder, blkEncodingCtx,
|
||||
userDataStream);
|
||||
EncodedSeeker seeker = encoder.createSeeker(CellComparatorImpl.COMPARATOR,
|
||||
encoder.newDataBlockDecodingContext(meta));
|
||||
byte[] onDiskBytes = baosInMemory.toByteArray();
|
||||
ByteBuffer readBuffer = ByteBuffer.wrap(onDiskBytes, DataBlockEncoding.ID_SIZE,
|
||||
onDiskBytes.length - DataBlockEncoding.ID_SIZE);
|
||||
seeker.setCurrentBuffer(new SingleByteBuff(readBuffer));
|
||||
|
||||
// Seek before the first keyvalue;
|
||||
Cell seekKey =
|
||||
PrivateCellUtil.createFirstDeleteFamilyCellOnRow(getRowKey(batchId, 0), CF_BYTES);
|
||||
seeker.seekToKeyInBlock(seekKey, true);
|
||||
assertEquals(null, seeker.getCell());
|
||||
|
||||
// Seek before the middle keyvalue;
|
||||
seekKey = PrivateCellUtil
|
||||
.createFirstDeleteFamilyCellOnRow(getRowKey(batchId, NUM_ROWS_PER_BATCH / 3), CF_BYTES);
|
||||
seeker.seekToKeyInBlock(seekKey, true);
|
||||
assertNotNull(seeker.getCell());
|
||||
assertArrayEquals(getRowKey(batchId, NUM_ROWS_PER_BATCH / 3 - 1),
|
||||
CellUtil.cloneRow(seeker.getCell()));
|
||||
|
||||
// Seek before the last keyvalue;
|
||||
seekKey = PrivateCellUtil.createFirstDeleteFamilyCellOnRow(Bytes.toBytes("zzzz"), CF_BYTES);
|
||||
seeker.seekToKeyInBlock(seekKey, true);
|
||||
assertNotNull(seeker.getCell());
|
||||
assertArrayEquals(getRowKey(batchId, NUM_ROWS_PER_BATCH - 1),
|
||||
CellUtil.cloneRow(seeker.getCell()));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testScanWithRandomData() throws Exception {
|
||||
PrefixTreeCodec encoder = new PrefixTreeCodec();
|
||||
ByteArrayOutputStream baosInMemory = new ByteArrayOutputStream();
|
||||
DataOutputStream userDataStream = new DataOutputStream(baosInMemory);
|
||||
HFileContext meta = new HFileContextBuilder()
|
||||
.withHBaseCheckSum(false)
|
||||
.withIncludesMvcc(false)
|
||||
.withIncludesTags(includesTag)
|
||||
.withCompression(Algorithm.NONE)
|
||||
.build();
|
||||
HFileBlockEncodingContext blkEncodingCtx = new HFileBlockDefaultEncodingContext(
|
||||
DataBlockEncoding.PREFIX_TREE, new byte[0], meta);
|
||||
generateRandomTestData(kvset, numBatchesWritten++, includesTag, encoder, blkEncodingCtx,
|
||||
userDataStream);
|
||||
EncodedSeeker seeker = encoder.createSeeker(CellComparatorImpl.COMPARATOR,
|
||||
encoder.newDataBlockDecodingContext(meta));
|
||||
byte[] onDiskBytes = baosInMemory.toByteArray();
|
||||
ByteBuffer readBuffer = ByteBuffer.wrap(onDiskBytes, DataBlockEncoding.ID_SIZE,
|
||||
onDiskBytes.length - DataBlockEncoding.ID_SIZE);
|
||||
seeker.setCurrentBuffer(new SingleByteBuff(readBuffer));
|
||||
Cell previousKV = null;
|
||||
do {
|
||||
Cell currentKV = seeker.getCell();
|
||||
System.out.println(currentKV);
|
||||
if (previousKV != null && CellComparatorImpl.COMPARATOR.compare(currentKV, previousKV) < 0) {
|
||||
dumpInputKVSet();
|
||||
fail("Current kv " + currentKV + " is smaller than previous keyvalue " + previousKV);
|
||||
}
|
||||
if (!includesTag) {
|
||||
assertFalse(currentKV.getTagsLength() > 0);
|
||||
} else {
|
||||
Assert.assertTrue(currentKV.getTagsLength() > 0);
|
||||
}
|
||||
previousKV = currentKV;
|
||||
} while (seeker.next());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSeekWithRandomData() throws Exception {
|
||||
PrefixTreeCodec encoder = new PrefixTreeCodec();
|
||||
ByteArrayOutputStream baosInMemory = new ByteArrayOutputStream();
|
||||
DataOutputStream userDataStream = new DataOutputStream(baosInMemory);
|
||||
int batchId = numBatchesWritten++;
|
||||
HFileContext meta = new HFileContextBuilder()
|
||||
.withHBaseCheckSum(false)
|
||||
.withIncludesMvcc(false)
|
||||
.withIncludesTags(includesTag)
|
||||
.withCompression(Algorithm.NONE)
|
||||
.build();
|
||||
HFileBlockEncodingContext blkEncodingCtx = new HFileBlockDefaultEncodingContext(
|
||||
DataBlockEncoding.PREFIX_TREE, new byte[0], meta);
|
||||
generateRandomTestData(kvset, batchId, includesTag, encoder, blkEncodingCtx, userDataStream);
|
||||
EncodedSeeker seeker = encoder.createSeeker(CellComparatorImpl.COMPARATOR,
|
||||
encoder.newDataBlockDecodingContext(meta));
|
||||
byte[] onDiskBytes = baosInMemory.toByteArray();
|
||||
ByteBuffer readBuffer = ByteBuffer.wrap(onDiskBytes, DataBlockEncoding.ID_SIZE,
|
||||
onDiskBytes.length - DataBlockEncoding.ID_SIZE);
|
||||
verifySeeking(seeker, readBuffer, batchId);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSeekWithFixedData() throws Exception {
|
||||
PrefixTreeCodec encoder = new PrefixTreeCodec();
|
||||
int batchId = numBatchesWritten++;
|
||||
HFileContext meta = new HFileContextBuilder()
|
||||
.withHBaseCheckSum(false)
|
||||
.withIncludesMvcc(false)
|
||||
.withIncludesTags(includesTag)
|
||||
.withCompression(Algorithm.NONE)
|
||||
.build();
|
||||
HFileBlockEncodingContext blkEncodingCtx = new HFileBlockDefaultEncodingContext(
|
||||
DataBlockEncoding.PREFIX_TREE, new byte[0], meta);
|
||||
ByteArrayOutputStream baosInMemory = new ByteArrayOutputStream();
|
||||
DataOutputStream userDataStream = new DataOutputStream(baosInMemory);
|
||||
generateFixedTestData(kvset, batchId, includesTag, encoder, blkEncodingCtx, userDataStream);
|
||||
EncodedSeeker seeker = encoder.createSeeker(CellComparatorImpl.COMPARATOR,
|
||||
encoder.newDataBlockDecodingContext(meta));
|
||||
byte[] onDiskBytes = baosInMemory.toByteArray();
|
||||
ByteBuffer readBuffer = ByteBuffer.wrap(onDiskBytes, DataBlockEncoding.ID_SIZE,
|
||||
onDiskBytes.length - DataBlockEncoding.ID_SIZE);
|
||||
verifySeeking(seeker, readBuffer, batchId);
|
||||
}
|
||||
|
||||
private void verifySeeking(EncodedSeeker encodeSeeker,
|
||||
ByteBuffer encodedData, int batchId) {
|
||||
List<KeyValue> kvList = new ArrayList<>();
|
||||
for (int i = 0; i < NUM_ROWS_PER_BATCH; ++i) {
|
||||
kvList.clear();
|
||||
encodeSeeker.setCurrentBuffer(new SingleByteBuff(encodedData));
|
||||
KeyValue firstOnRow = KeyValueUtil.createFirstOnRow(getRowKey(batchId, i));
|
||||
encodeSeeker.seekToKeyInBlock(
|
||||
new KeyValue.KeyOnlyKeyValue(firstOnRow.getBuffer(), firstOnRow.getKeyOffset(),
|
||||
firstOnRow.getKeyLength()), false);
|
||||
boolean hasMoreOfEncodeScanner = encodeSeeker.next();
|
||||
CollectionBackedScanner collectionScanner = new CollectionBackedScanner(
|
||||
this.kvset);
|
||||
boolean hasMoreOfCollectionScanner = collectionScanner.seek(firstOnRow);
|
||||
if (hasMoreOfEncodeScanner != hasMoreOfCollectionScanner) {
|
||||
dumpInputKVSet();
|
||||
fail("Get error result after seeking " + firstOnRow);
|
||||
}
|
||||
if (hasMoreOfEncodeScanner) {
|
||||
if (CellComparatorImpl.COMPARATOR.compare(encodeSeeker.getCell(),
|
||||
collectionScanner.peek()) != 0) {
|
||||
dumpInputKVSet();
|
||||
fail("Expected " + collectionScanner.peek() + " actual "
|
||||
+ encodeSeeker.getCell() + ", after seeking " + firstOnRow);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void dumpInputKVSet() {
|
||||
LOG.info("Dumping input keyvalue set in error case:");
|
||||
for (Cell kv : kvset) {
|
||||
System.out.println(kv);
|
||||
}
|
||||
}
|
||||
|
||||
private static void generateFixedTestData(ConcurrentSkipListSet<Cell> kvset, int batchId,
|
||||
boolean useTags, PrefixTreeCodec encoder, HFileBlockEncodingContext blkEncodingCtx,
|
||||
DataOutputStream userDataStream) throws Exception {
|
||||
generateFixedTestData(kvset, batchId, true, useTags, encoder, blkEncodingCtx, userDataStream);
|
||||
}
|
||||
|
||||
private static void generateFixedTestData(ConcurrentSkipListSet<Cell> kvset,
|
||||
int batchId, boolean partial, boolean useTags, PrefixTreeCodec encoder,
|
||||
HFileBlockEncodingContext blkEncodingCtx, DataOutputStream userDataStream) throws Exception {
|
||||
for (int i = 0; i < NUM_ROWS_PER_BATCH; ++i) {
|
||||
if (partial && i / 10 % 2 == 1)
|
||||
continue;
|
||||
for (int j = 0; j < NUM_COLS_PER_ROW; ++j) {
|
||||
if (!useTags) {
|
||||
KeyValue kv = new KeyValue(getRowKey(batchId, i), CF_BYTES, getQualifier(j), getValue(
|
||||
batchId, i, j));
|
||||
kvset.add(kv);
|
||||
} else {
|
||||
KeyValue kv = new KeyValue(getRowKey(batchId, i), CF_BYTES, getQualifier(j), 0l,
|
||||
getValue(batchId, i, j), new Tag[] { new ArrayBackedTag((byte) 1, "metaValue1") });
|
||||
kvset.add(kv);
|
||||
}
|
||||
}
|
||||
}
|
||||
encoder.startBlockEncoding(blkEncodingCtx, userDataStream);
|
||||
for (Cell kv : kvset) {
|
||||
encoder.encode(kv, blkEncodingCtx, userDataStream);
|
||||
}
|
||||
encoder.endBlockEncoding(blkEncodingCtx, userDataStream, null);
|
||||
}
|
||||
|
||||
private static void generateRandomTestData(ConcurrentSkipListSet<Cell> kvset,
|
||||
int batchId, boolean useTags, PrefixTreeCodec encoder,
|
||||
HFileBlockEncodingContext blkEncodingCtx, DataOutputStream userDataStream) throws Exception {
|
||||
Random random = new Random();
|
||||
for (int i = 0; i < NUM_ROWS_PER_BATCH; ++i) {
|
||||
if (random.nextInt(100) < 50)
|
||||
continue;
|
||||
for (int j = 0; j < NUM_COLS_PER_ROW; ++j) {
|
||||
if (random.nextInt(100) < 50)
|
||||
continue;
|
||||
if (!useTags) {
|
||||
KeyValue kv = new KeyValue(getRowKey(batchId, i), CF_BYTES, getQualifier(j), getValue(
|
||||
batchId, i, j));
|
||||
kvset.add(kv);
|
||||
} else {
|
||||
KeyValue kv = new KeyValue(getRowKey(batchId, i), CF_BYTES, getQualifier(j), 0l,
|
||||
getValue(batchId, i, j), new Tag[] { new ArrayBackedTag((byte) 1, "metaValue1") });
|
||||
kvset.add(kv);
|
||||
}
|
||||
}
|
||||
}
|
||||
encoder.startBlockEncoding(blkEncodingCtx, userDataStream);
|
||||
for (Cell kv : kvset) {
|
||||
encoder.encode(kv, blkEncodingCtx, userDataStream);
|
||||
}
|
||||
encoder.endBlockEncoding(blkEncodingCtx, userDataStream, null);
|
||||
}
|
||||
|
||||
private static byte[] getRowKey(int batchId, int i) {
|
||||
return Bytes
|
||||
.toBytes("batch" + batchId + "_row" + (formatRowNum ? String.format("%04d", i) : i));
|
||||
}
|
||||
|
||||
private static byte[] getQualifier(int j) {
|
||||
return Bytes.toBytes("colfdfafhfhsdfhsdfh" + j);
|
||||
}
|
||||
|
||||
private static byte[] getValue(int batchId, int i, int j) {
|
||||
return Bytes.toBytes("value_for_" + Bytes.toString(getRowKey(batchId, i)) + "_col" + j);
|
||||
}
|
||||
|
||||
}
|
|
@ -272,10 +272,9 @@ public class TestSeekToBlockWithEncoders {
|
|||
// create all seekers
|
||||
List<DataBlockEncoder.EncodedSeeker> encodedSeekers = new ArrayList<>();
|
||||
for (DataBlockEncoding encoding : DataBlockEncoding.values()) {
|
||||
if (encoding.getEncoder() == null || encoding == DataBlockEncoding.PREFIX_TREE) {
|
||||
if (encoding.getEncoder() == null) {
|
||||
continue;
|
||||
}
|
||||
|
||||
DataBlockEncoder encoder = encoding.getEncoder();
|
||||
HFileContext meta = new HFileContextBuilder().withHBaseCheckSum(false)
|
||||
.withIncludesMvcc(false).withIncludesTags(false)
|
||||
|
|
|
@ -149,10 +149,6 @@ public class TestHFileDataBlockEncoder {
|
|||
@Test
|
||||
public void testEncodingWithOffheapKeyValue() throws IOException {
|
||||
// usually we have just block without headers, but don't complicate that
|
||||
if(blockEncoder.getDataBlockEncoding() == DataBlockEncoding.PREFIX_TREE) {
|
||||
// This is a TODO: Only after PrefixTree is fixed we can remove this check
|
||||
return;
|
||||
}
|
||||
try {
|
||||
List<Cell> kvs = generator.generateTestExtendedOffheapKeyValues(60, true);
|
||||
HFileContext meta = new HFileContextBuilder().withIncludesMvcc(includesMemstoreTS)
|
||||
|
|
|
@ -316,12 +316,7 @@ public class TestSeekTo {
|
|||
assertEquals("i", toRowStr(scanner.getCell()));
|
||||
|
||||
assertEquals(1, scanner.seekTo(toKV("l", tagUsage)));
|
||||
if (encoding == DataBlockEncoding.PREFIX_TREE) {
|
||||
// TODO : Fix this
|
||||
assertEquals(null, scanner.getCell());
|
||||
} else {
|
||||
assertEquals("k", toRowStr(scanner.getCell()));
|
||||
}
|
||||
assertEquals("k", toRowStr(scanner.getCell()));
|
||||
|
||||
reader.close();
|
||||
deleteTestDir(fs);
|
||||
|
|
|
@ -116,8 +116,7 @@ public class TestTags {
|
|||
HTableDescriptor desc = new HTableDescriptor(tableName);
|
||||
HColumnDescriptor colDesc = new HColumnDescriptor(fam);
|
||||
colDesc.setBlockCacheEnabled(true);
|
||||
// colDesc.setDataBlockEncoding(DataBlockEncoding.NONE);
|
||||
colDesc.setDataBlockEncoding(DataBlockEncoding.PREFIX_TREE);
|
||||
colDesc.setDataBlockEncoding(DataBlockEncoding.NONE);
|
||||
desc.addFamily(colDesc);
|
||||
Admin admin = TEST_UTIL.getAdmin();
|
||||
admin.createTable(desc);
|
||||
|
@ -183,7 +182,7 @@ public class TestTags {
|
|||
HColumnDescriptor colDesc = new HColumnDescriptor(fam);
|
||||
colDesc.setBlockCacheEnabled(true);
|
||||
// colDesc.setDataBlockEncoding(DataBlockEncoding.NONE);
|
||||
colDesc.setDataBlockEncoding(DataBlockEncoding.PREFIX_TREE);
|
||||
// colDesc.setDataBlockEncoding(DataBlockEncoding.PREFIX_TREE);
|
||||
desc.addFamily(colDesc);
|
||||
Admin admin = TEST_UTIL.getAdmin();
|
||||
admin.createTable(desc);
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue