HBASE-13450 - Purge RawBytescomparator from the writers and readers for

HBASE-10800 (Ram)
This commit is contained in:
ramkrishna 2015-04-24 16:01:38 +05:30
parent e1cb405adc
commit a4fa930843
9 changed files with 127 additions and 65 deletions

View File

@ -2055,6 +2055,23 @@ public class Bytes implements Comparable<Bytes> {
return result;
}
/**
* Binary search for keys in indexes using Bytes.BYTES_RAWCOMPARATOR
*
* @param arr array of byte arrays to search for
* @param key the key you want to find
* @param offset the offset in the key you want to find
* @param length the length of the key
* @return zero-based index of the key, if the key is present in the array.
* Otherwise, a value -(i + 1) such that the key is between arr[i -
* 1] and arr[i] non-inclusively, where i is in [0, i], if we define
* arr[-1] = -Inf and arr[N] = Inf for an N-element array. The above
* means that this function can return 2N + 1 different values
* ranging from -(N + 1) to N - 1.
*/
public static int binarySearch(byte[][] arr, byte[] key, int offset, int length) {
return binarySearch(arr, key, offset, length, Bytes.BYTES_RAWCOMPARATOR);
}
/**
* Binary search for keys in indexes.
*
@ -2069,9 +2086,14 @@ public class Bytes implements Comparable<Bytes> {
* arr[-1] = -Inf and arr[N] = Inf for an N-element array. The above
* means that this function can return 2N + 1 different values
* ranging from -(N + 1) to N - 1.
* @deprecated {@link Bytes#binarySearch(byte[][], byte[], int, int)}
*/
@Deprecated
public static int binarySearch(byte [][]arr, byte []key, int offset,
int length, RawComparator<?> comparator) {
if(comparator == null) {
comparator = Bytes.BYTES_RAWCOMPARATOR;
}
int low = 0;
int high = arr.length - 1;
@ -2107,7 +2129,9 @@ public class Bytes implements Comparable<Bytes> {
* means that this function can return 2N + 1 different values
* ranging from -(N + 1) to N - 1.
* @return the index of the block
* @deprecated Use {@link Bytes#binarySearch(byte[][], Cell, Comparator)}
*/
@Deprecated
public static int binarySearch(byte[][] arr, Cell key, RawComparator<Cell> comparator) {
int low = 0;
int high = arr.length - 1;
@ -2131,6 +2155,43 @@ public class Bytes implements Comparable<Bytes> {
return - (low+1);
}
/**
* Binary search for keys in indexes.
*
* @param arr array of byte arrays to search for
* @param key the key you want to find
* @param comparator a comparator to compare.
* @return zero-based index of the key, if the key is present in the array.
* Otherwise, a value -(i + 1) such that the key is between arr[i -
* 1] and arr[i] non-inclusively, where i is in [0, i], if we define
* arr[-1] = -Inf and arr[N] = Inf for an N-element array. The above
* means that this function can return 2N + 1 different values
* ranging from -(N + 1) to N - 1.
* @return the index of the block
*/
public static int binarySearch(byte[][] arr, Cell key, Comparator<Cell> comparator) {
int low = 0;
int high = arr.length - 1;
KeyValue.KeyOnlyKeyValue r = new KeyValue.KeyOnlyKeyValue();
while (low <= high) {
int mid = (low+high) >>> 1;
// we have to compare in this order, because the comparator order
// has special logic when the 'left side' is a special key.
r.setKey(arr[mid], 0, arr[mid].length);
int cmp = comparator.compare(key, r);
// key lives above the midpoint
if (cmp > 0)
low = mid + 1;
// key lives below the midpoint
else if (cmp < 0)
high = mid - 1;
// BAM. how often does this really happen?
else
return mid;
}
return - (low+1);
}
/**
* Bytewise binary increment/deincrement of long contained in byte array
* on given amount.

View File

@ -35,6 +35,7 @@ import org.apache.hadoop.hbase.io.compress.Compression;
import org.apache.hadoop.hbase.protobuf.generated.HFileProtos;
import org.apache.hadoop.hbase.util.Bytes;
/**
* The {@link HFile} has a fixed trailer which contains offsets to other
* variable parts of the file. Also includes basic metadata on this file. The
@ -107,7 +108,8 @@ public class FixedFileTrailer {
private long lastDataBlockOffset;
/** Raw key comparator class name in version 3 */
private String comparatorClassName = KeyValue.COMPARATOR.getLegacyKeyComparatorName();
// We could write the actual class name from 2.0 onwards and handle BC
private String comparatorClassName = KeyValue.COMPARATOR.getClass().getName();
/** The encryption key */
private byte[] encryptionKey;
@ -200,7 +202,7 @@ public class FixedFileTrailer {
.setNumDataIndexLevels(numDataIndexLevels)
.setFirstDataBlockOffset(firstDataBlockOffset)
.setLastDataBlockOffset(lastDataBlockOffset)
// TODO this is a classname encoded into an HFile's trailer. We are going to need to have
// TODO this is a classname encoded into an HFile's trailer. We are going to need to have
// some compat code here.
.setComparatorClassName(comparatorClassName)
.setCompressionCodec(compressionCodec.ordinal());
@ -539,25 +541,17 @@ public class FixedFileTrailer {
public void setComparatorClass(Class<? extends KVComparator> klass) {
// Is the comparator instantiable?
try {
KVComparator comp = klass.newInstance();
// HFile V2 legacy comparator class names.
if (KeyValue.COMPARATOR.getClass().equals(klass)) {
comparatorClassName = KeyValue.COMPARATOR.getLegacyKeyComparatorName();
} else if (KeyValue.META_COMPARATOR.getClass().equals(klass)) {
comparatorClassName = KeyValue.META_COMPARATOR.getLegacyKeyComparatorName();
} else if (KeyValue.RAW_COMPARATOR.getClass().equals(klass)) {
comparatorClassName = KeyValue.RAW_COMPARATOR.getLegacyKeyComparatorName();
} else {
// if the name wasn't one of the legacy names, maybe its a legit new kind of comparator.
// If null, it should be the Bytes.BYTES_RAWCOMPARATOR
if (klass != null) {
KVComparator comp = klass.newInstance();
// if the name wasn't one of the legacy names, maybe its a legit new
// kind of comparator.
comparatorClassName = klass.getName();
}
} catch (Exception e) {
throw new RuntimeException("Comparator class " + klass.getName() +
" is not instantiable", e);
throw new RuntimeException("Comparator class " + klass.getName() + " is not instantiable", e);
}
}
@SuppressWarnings("unchecked")
@ -570,13 +564,16 @@ public class FixedFileTrailer {
} else if (comparatorClassName.equals(KeyValue.META_COMPARATOR.getLegacyKeyComparatorName())) {
comparatorClassName = KeyValue.META_COMPARATOR.getClass().getName();
} else if (comparatorClassName.equals(KeyValue.RAW_COMPARATOR.getLegacyKeyComparatorName())) {
comparatorClassName = KeyValue.RAW_COMPARATOR.getClass().getName();
return null;
}
// if the name wasn't one of the legacy names, maybe its a legit new kind of comparator.
return (Class<? extends KVComparator>)
Class.forName(comparatorClassName);
if (comparatorClassName.equals(KeyValue.RAW_COMPARATOR.getClass().getName())) {
// Return null for Bytes.BYTES_RAWCOMPARATOR
return null;
} else {
return (Class<? extends KVComparator>) Class.forName(comparatorClassName);
}
} catch (ClassNotFoundException ex) {
throw new IOException(ex);
}
@ -585,7 +582,8 @@ public class FixedFileTrailer {
public static KVComparator createComparator(
String comparatorClassName) throws IOException {
try {
return getComparatorClass(comparatorClassName).newInstance();
Class<? extends KVComparator> comparatorClass = getComparatorClass(comparatorClassName);
return comparatorClass != null ? comparatorClass.newInstance() : null;
} catch (InstantiationException e) {
throw new IOException("Comparator class " + comparatorClassName +
" is not instantiable", e);

View File

@ -884,15 +884,22 @@ public class StoreFile {
" (ROW or ROWCOL expected)");
}
generalBloomFilterWriter.add(bloomKey, bloomKeyOffset, bloomKeyLen);
if (lastBloomKey != null
&& generalBloomFilterWriter.getComparator().compareFlatKey(bloomKey,
bloomKeyOffset, bloomKeyLen, lastBloomKey,
lastBloomKeyOffset, lastBloomKeyLen) <= 0) {
throw new IOException("Non-increasing Bloom keys: "
+ Bytes.toStringBinary(bloomKey, bloomKeyOffset, bloomKeyLen)
+ " after "
+ Bytes.toStringBinary(lastBloomKey, lastBloomKeyOffset,
lastBloomKeyLen));
if (lastBloomKey != null) {
int res = 0;
// hbase:meta does not have blooms. So we need not have special interpretation
// of the hbase:meta cells. We can safely use Bytes.BYTES_RAWCOMPARATOR for ROW Bloom
if (bloomType == BloomType.ROW) {
res = Bytes.BYTES_RAWCOMPARATOR.compare(bloomKey, bloomKeyOffset, bloomKeyLen,
lastBloomKey, lastBloomKeyOffset, lastBloomKeyLen);
} else {
res = KeyValue.COMPARATOR.compareFlatKey(bloomKey,
bloomKeyOffset, bloomKeyLen, lastBloomKey, lastBloomKeyOffset, lastBloomKeyLen);
}
if (res <= 0) {
throw new IOException("Non-increasing Bloom keys: "
+ Bytes.toStringBinary(bloomKey, bloomKeyOffset, bloomKeyLen) + " after "
+ Bytes.toStringBinary(lastBloomKey, lastBloomKeyOffset, lastBloomKeyLen));
}
}
lastBloomKey = bloomKey;
lastBloomKeyOffset = bloomKeyOffset;
@ -913,6 +920,8 @@ public class StoreFile {
if (null != this.deleteFamilyBloomFilterWriter) {
boolean newKey = true;
if (lastDeleteFamilyCell != null) {
// hbase:meta does not have blooms. So we need not have special interpretation
// of the hbase:meta cells
newKey = !kvComparator.matchingRows(cell, lastDeleteFamilyCell);
}
if (newKey) {
@ -1280,8 +1289,16 @@ public class StoreFile {
// Whether the primary Bloom key is greater than the last Bloom key
// from the file info. For row-column Bloom filters this is not yet
// a sufficient condition to return false.
boolean keyIsAfterLast = lastBloomKey != null
&& bloomFilter.getComparator().compareFlatKey(key, lastBloomKey) > 0;
boolean keyIsAfterLast = (lastBloomKey != null);
// hbase:meta does not have blooms. So we need not have special interpretation
// of the hbase:meta cells. We can safely use Bytes.BYTES_RAWCOMPARATOR for ROW Bloom
if (keyIsAfterLast) {
if (bloomFilterType == BloomType.ROW) {
keyIsAfterLast = (Bytes.BYTES_RAWCOMPARATOR.compare(key, lastBloomKey) > 0);
} else {
keyIsAfterLast = (KeyValue.COMPARATOR.compareFlatKey(key, lastBloomKey)) > 0;
}
}
if (bloomFilterType == BloomType.ROWCOL) {
// Since a Row Delete is essentially a DeleteFamily applied to all
@ -1292,7 +1309,7 @@ public class StoreFile {
null, 0, 0);
if (keyIsAfterLast
&& bloomFilter.getComparator().compareFlatKey(rowBloomKey,
&& KeyValue.COMPARATOR.compareFlatKey(rowBloomKey,
lastBloomKey) > 0) {
exists = false;
} else {

View File

@ -49,9 +49,4 @@ public interface BloomFilterBase {
byte[] createBloomKey(byte[] rowBuf, int rowOffset, int rowLen,
byte[] qualBuf, int qualOffset, int qualLen);
/**
* @return Bloom key comparator
*/
KVComparator getComparator();
}

View File

@ -199,7 +199,7 @@ public final class BloomFilterFactory {
// In case of compound Bloom filters we ignore the maxKeys hint.
CompoundBloomFilterWriter bloomWriter = new CompoundBloomFilterWriter(getBloomBlockSize(conf),
err, Hash.getHashType(conf), maxFold, cacheConf.shouldCacheBloomsOnWrite(),
bloomType == BloomType.ROWCOL ? KeyValue.COMPARATOR : KeyValue.RAW_COMPARATOR);
bloomType == BloomType.ROWCOL ? KeyValue.COMPARATOR : null);
writer.addInlineBlockWriter(bloomWriter);
return bloomWriter;
}
@ -230,7 +230,7 @@ public final class BloomFilterFactory {
// In case of compound Bloom filters we ignore the maxKeys hint.
CompoundBloomFilterWriter bloomWriter = new CompoundBloomFilterWriter(getBloomBlockSize(conf),
err, Hash.getHashType(conf), maxFold, cacheConf.shouldCacheBloomsOnWrite(),
KeyValue.RAW_COMPARATOR);
null);
writer.addInlineBlockWriter(bloomWriter);
return bloomWriter;
}

View File

@ -27,8 +27,6 @@ import java.text.NumberFormat;
import java.util.Random;
import org.apache.hadoop.hbase.classification.InterfaceAudience;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.KeyValue.KVComparator;
import org.apache.hadoop.io.Writable;
/**
@ -625,12 +623,6 @@ public class ByteBloomFilter implements BloomFilter, BloomFilterWriter {
return result;
}
@Override
public KVComparator getComparator() {
// return Bytes.BYTES_RAWCOMPARATOR;
return KeyValue.RAW_COMPARATOR;
}
/**
* A human-readable string with statistics for the given Bloom filter.
*

View File

@ -24,7 +24,6 @@ import java.io.IOException;
import java.nio.ByteBuffer;
import org.apache.hadoop.hbase.classification.InterfaceAudience;
import org.apache.hadoop.hbase.KeyValue.KVComparator;
import org.apache.hadoop.hbase.io.hfile.BlockType;
import org.apache.hadoop.hbase.io.hfile.FixedFileTrailer;
import org.apache.hadoop.hbase.io.hfile.HFile;
@ -70,8 +69,13 @@ public class CompoundBloomFilter extends CompoundBloomFilterBase
totalKeyCount = meta.readLong();
totalMaxKeys = meta.readLong();
numChunks = meta.readInt();
comparator = FixedFileTrailer.createComparator(
Bytes.toString(Bytes.readByteArray(meta)));
byte[] comparatorClassName = Bytes.readByteArray(meta);
// The writer would have return 0 as the vint length for the case of
// Bytes.BYTES_RAWCOMPARATOR. In such cases do not initialize comparator, it can be
// null
if (comparatorClassName.length != 0) {
comparator = FixedFileTrailer.createComparator(Bytes.toString(comparatorClassName));
}
hash = Hash.getInstance(hashType);
if (hash == null) {
@ -131,11 +135,6 @@ public class CompoundBloomFilter extends CompoundBloomFilterBase
return numChunks;
}
@Override
public KVComparator getComparator() {
return comparator;
}
public void enableTestingStats() {
numQueriesPerChunk = new long[numChunks];
numPositivesPerChunk = new long[numChunks];
@ -172,7 +171,9 @@ public class CompoundBloomFilter extends CompoundBloomFilterBase
sb.append(ByteBloomFilter.STATS_RECORD_SEP +
"Number of chunks: " + numChunks);
sb.append(ByteBloomFilter.STATS_RECORD_SEP +
"Comparator: " + comparator.getClass().getSimpleName());
((comparator != null) ? "Comparator: "
+ comparator.getClass().getSimpleName() : "Comparator: "
+ Bytes.BYTES_RAWCOMPARATOR.getClass().getSimpleName()));
return sb.toString();
}

View File

@ -50,7 +50,6 @@ public class CompoundBloomFilterBase implements BloomFilterBase {
/** Hash function type to use, as defined in {@link Hash} */
protected int hashType;
/** Comparator used to compare Bloom filter keys */
protected KVComparator comparator;
@ -89,9 +88,4 @@ public class CompoundBloomFilterBase implements BloomFilterBase {
return kv.getKey();
}
@Override
public KVComparator getComparator() {
return comparator;
}
}

View File

@ -243,8 +243,12 @@ public class CompoundBloomFilterWriter extends CompoundBloomFilterBase
// Fields that don't have equivalents in ByteBloomFilter.
out.writeInt(numChunks);
Bytes.writeByteArray(out,
Bytes.toBytes(comparator.getClass().getName()));
if (comparator != null) {
Bytes.writeByteArray(out, Bytes.toBytes(comparator.getClass().getName()));
} else {
// Internally writes a 0 vint if the byte[] is null
Bytes.writeByteArray(out, null);
}
// Write a single-level index without compression or block header.
bloomBlockIndexWriter.writeSingleLevelIndex(out, "Bloom filter");