From a4fa930843896f6c134fe9372e75e79b03611d73 Mon Sep 17 00:00:00 2001 From: ramkrishna Date: Fri, 24 Apr 2015 16:01:38 +0530 Subject: [PATCH] HBASE-13450 - Purge RawBytescomparator from the writers and readers for HBASE-10800 (Ram) --- .../org/apache/hadoop/hbase/util/Bytes.java | 61 +++++++++++++++++++ .../hbase/io/hfile/FixedFileTrailer.java | 40 ++++++------ .../hadoop/hbase/regionserver/StoreFile.java | 41 +++++++++---- .../hadoop/hbase/util/BloomFilterBase.java | 5 -- .../hadoop/hbase/util/BloomFilterFactory.java | 4 +- .../hadoop/hbase/util/ByteBloomFilter.java | 8 --- .../hbase/util/CompoundBloomFilter.java | 19 +++--- .../hbase/util/CompoundBloomFilterBase.java | 6 -- .../hbase/util/CompoundBloomFilterWriter.java | 8 ++- 9 files changed, 127 insertions(+), 65 deletions(-) diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/Bytes.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/Bytes.java index f294ec92471..84c708b8050 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/Bytes.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/Bytes.java @@ -2055,6 +2055,23 @@ public class Bytes implements Comparable { return result; } + /** + * Binary search for keys in indexes using Bytes.BYTES_RAWCOMPARATOR + * + * @param arr array of byte arrays to search for + * @param key the key you want to find + * @param offset the offset in the key you want to find + * @param length the length of the key + * @return zero-based index of the key, if the key is present in the array. + * Otherwise, a value -(i + 1) such that the key is between arr[i - + * 1] and arr[i] non-inclusively, where i is in [0, i], if we define + * arr[-1] = -Inf and arr[N] = Inf for an N-element array. The above + * means that this function can return 2N + 1 different values + * ranging from -(N + 1) to N - 1. + */ + public static int binarySearch(byte[][] arr, byte[] key, int offset, int length) { + return binarySearch(arr, key, offset, length, Bytes.BYTES_RAWCOMPARATOR); + } /** * Binary search for keys in indexes. * @@ -2069,9 +2086,14 @@ public class Bytes implements Comparable { * arr[-1] = -Inf and arr[N] = Inf for an N-element array. The above * means that this function can return 2N + 1 different values * ranging from -(N + 1) to N - 1. + * @deprecated {@link Bytes#binarySearch(byte[][], byte[], int, int)} */ + @Deprecated public static int binarySearch(byte [][]arr, byte []key, int offset, int length, RawComparator comparator) { + if(comparator == null) { + comparator = Bytes.BYTES_RAWCOMPARATOR; + } int low = 0; int high = arr.length - 1; @@ -2107,7 +2129,9 @@ public class Bytes implements Comparable { * means that this function can return 2N + 1 different values * ranging from -(N + 1) to N - 1. * @return the index of the block + * @deprecated Use {@link Bytes#binarySearch(byte[][], Cell, Comparator)} */ + @Deprecated public static int binarySearch(byte[][] arr, Cell key, RawComparator comparator) { int low = 0; int high = arr.length - 1; @@ -2131,6 +2155,43 @@ public class Bytes implements Comparable { return - (low+1); } + /** + * Binary search for keys in indexes. + * + * @param arr array of byte arrays to search for + * @param key the key you want to find + * @param comparator a comparator to compare. + * @return zero-based index of the key, if the key is present in the array. + * Otherwise, a value -(i + 1) such that the key is between arr[i - + * 1] and arr[i] non-inclusively, where i is in [0, i], if we define + * arr[-1] = -Inf and arr[N] = Inf for an N-element array. The above + * means that this function can return 2N + 1 different values + * ranging from -(N + 1) to N - 1. + * @return the index of the block + */ + public static int binarySearch(byte[][] arr, Cell key, Comparator comparator) { + int low = 0; + int high = arr.length - 1; + KeyValue.KeyOnlyKeyValue r = new KeyValue.KeyOnlyKeyValue(); + while (low <= high) { + int mid = (low+high) >>> 1; + // we have to compare in this order, because the comparator order + // has special logic when the 'left side' is a special key. + r.setKey(arr[mid], 0, arr[mid].length); + int cmp = comparator.compare(key, r); + // key lives above the midpoint + if (cmp > 0) + low = mid + 1; + // key lives below the midpoint + else if (cmp < 0) + high = mid - 1; + // BAM. how often does this really happen? + else + return mid; + } + return - (low+1); + } + /** * Bytewise binary increment/deincrement of long contained in byte array * on given amount. diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/FixedFileTrailer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/FixedFileTrailer.java index 3dcfc9b67a8..9ce505617db 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/FixedFileTrailer.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/FixedFileTrailer.java @@ -35,6 +35,7 @@ import org.apache.hadoop.hbase.io.compress.Compression; import org.apache.hadoop.hbase.protobuf.generated.HFileProtos; import org.apache.hadoop.hbase.util.Bytes; + /** * The {@link HFile} has a fixed trailer which contains offsets to other * variable parts of the file. Also includes basic metadata on this file. The @@ -107,7 +108,8 @@ public class FixedFileTrailer { private long lastDataBlockOffset; /** Raw key comparator class name in version 3 */ - private String comparatorClassName = KeyValue.COMPARATOR.getLegacyKeyComparatorName(); + // We could write the actual class name from 2.0 onwards and handle BC + private String comparatorClassName = KeyValue.COMPARATOR.getClass().getName(); /** The encryption key */ private byte[] encryptionKey; @@ -200,7 +202,7 @@ public class FixedFileTrailer { .setNumDataIndexLevels(numDataIndexLevels) .setFirstDataBlockOffset(firstDataBlockOffset) .setLastDataBlockOffset(lastDataBlockOffset) - // TODO this is a classname encoded into an HFile's trailer. We are going to need to have + // TODO this is a classname encoded into an HFile's trailer. We are going to need to have // some compat code here. .setComparatorClassName(comparatorClassName) .setCompressionCodec(compressionCodec.ordinal()); @@ -539,25 +541,17 @@ public class FixedFileTrailer { public void setComparatorClass(Class klass) { // Is the comparator instantiable? try { - KVComparator comp = klass.newInstance(); - - // HFile V2 legacy comparator class names. - if (KeyValue.COMPARATOR.getClass().equals(klass)) { - comparatorClassName = KeyValue.COMPARATOR.getLegacyKeyComparatorName(); - } else if (KeyValue.META_COMPARATOR.getClass().equals(klass)) { - comparatorClassName = KeyValue.META_COMPARATOR.getLegacyKeyComparatorName(); - } else if (KeyValue.RAW_COMPARATOR.getClass().equals(klass)) { - comparatorClassName = KeyValue.RAW_COMPARATOR.getLegacyKeyComparatorName(); - } else { - // if the name wasn't one of the legacy names, maybe its a legit new kind of comparator. + // If null, it should be the Bytes.BYTES_RAWCOMPARATOR + if (klass != null) { + KVComparator comp = klass.newInstance(); + // if the name wasn't one of the legacy names, maybe its a legit new + // kind of comparator. comparatorClassName = klass.getName(); } } catch (Exception e) { - throw new RuntimeException("Comparator class " + klass.getName() + - " is not instantiable", e); + throw new RuntimeException("Comparator class " + klass.getName() + " is not instantiable", e); } - } @SuppressWarnings("unchecked") @@ -570,13 +564,16 @@ public class FixedFileTrailer { } else if (comparatorClassName.equals(KeyValue.META_COMPARATOR.getLegacyKeyComparatorName())) { comparatorClassName = KeyValue.META_COMPARATOR.getClass().getName(); } else if (comparatorClassName.equals(KeyValue.RAW_COMPARATOR.getLegacyKeyComparatorName())) { - comparatorClassName = KeyValue.RAW_COMPARATOR.getClass().getName(); + return null; } // if the name wasn't one of the legacy names, maybe its a legit new kind of comparator. - - return (Class) - Class.forName(comparatorClassName); + if (comparatorClassName.equals(KeyValue.RAW_COMPARATOR.getClass().getName())) { + // Return null for Bytes.BYTES_RAWCOMPARATOR + return null; + } else { + return (Class) Class.forName(comparatorClassName); + } } catch (ClassNotFoundException ex) { throw new IOException(ex); } @@ -585,7 +582,8 @@ public class FixedFileTrailer { public static KVComparator createComparator( String comparatorClassName) throws IOException { try { - return getComparatorClass(comparatorClassName).newInstance(); + Class comparatorClass = getComparatorClass(comparatorClassName); + return comparatorClass != null ? comparatorClass.newInstance() : null; } catch (InstantiationException e) { throw new IOException("Comparator class " + comparatorClassName + " is not instantiable", e); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java index 345dd9b36ce..8cb20cd4fb5 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java @@ -884,15 +884,22 @@ public class StoreFile { " (ROW or ROWCOL expected)"); } generalBloomFilterWriter.add(bloomKey, bloomKeyOffset, bloomKeyLen); - if (lastBloomKey != null - && generalBloomFilterWriter.getComparator().compareFlatKey(bloomKey, - bloomKeyOffset, bloomKeyLen, lastBloomKey, - lastBloomKeyOffset, lastBloomKeyLen) <= 0) { - throw new IOException("Non-increasing Bloom keys: " - + Bytes.toStringBinary(bloomKey, bloomKeyOffset, bloomKeyLen) - + " after " - + Bytes.toStringBinary(lastBloomKey, lastBloomKeyOffset, - lastBloomKeyLen)); + if (lastBloomKey != null) { + int res = 0; + // hbase:meta does not have blooms. So we need not have special interpretation + // of the hbase:meta cells. We can safely use Bytes.BYTES_RAWCOMPARATOR for ROW Bloom + if (bloomType == BloomType.ROW) { + res = Bytes.BYTES_RAWCOMPARATOR.compare(bloomKey, bloomKeyOffset, bloomKeyLen, + lastBloomKey, lastBloomKeyOffset, lastBloomKeyLen); + } else { + res = KeyValue.COMPARATOR.compareFlatKey(bloomKey, + bloomKeyOffset, bloomKeyLen, lastBloomKey, lastBloomKeyOffset, lastBloomKeyLen); + } + if (res <= 0) { + throw new IOException("Non-increasing Bloom keys: " + + Bytes.toStringBinary(bloomKey, bloomKeyOffset, bloomKeyLen) + " after " + + Bytes.toStringBinary(lastBloomKey, lastBloomKeyOffset, lastBloomKeyLen)); + } } lastBloomKey = bloomKey; lastBloomKeyOffset = bloomKeyOffset; @@ -913,6 +920,8 @@ public class StoreFile { if (null != this.deleteFamilyBloomFilterWriter) { boolean newKey = true; if (lastDeleteFamilyCell != null) { + // hbase:meta does not have blooms. So we need not have special interpretation + // of the hbase:meta cells newKey = !kvComparator.matchingRows(cell, lastDeleteFamilyCell); } if (newKey) { @@ -1280,8 +1289,16 @@ public class StoreFile { // Whether the primary Bloom key is greater than the last Bloom key // from the file info. For row-column Bloom filters this is not yet // a sufficient condition to return false. - boolean keyIsAfterLast = lastBloomKey != null - && bloomFilter.getComparator().compareFlatKey(key, lastBloomKey) > 0; + boolean keyIsAfterLast = (lastBloomKey != null); + // hbase:meta does not have blooms. So we need not have special interpretation + // of the hbase:meta cells. We can safely use Bytes.BYTES_RAWCOMPARATOR for ROW Bloom + if (keyIsAfterLast) { + if (bloomFilterType == BloomType.ROW) { + keyIsAfterLast = (Bytes.BYTES_RAWCOMPARATOR.compare(key, lastBloomKey) > 0); + } else { + keyIsAfterLast = (KeyValue.COMPARATOR.compareFlatKey(key, lastBloomKey)) > 0; + } + } if (bloomFilterType == BloomType.ROWCOL) { // Since a Row Delete is essentially a DeleteFamily applied to all @@ -1292,7 +1309,7 @@ public class StoreFile { null, 0, 0); if (keyIsAfterLast - && bloomFilter.getComparator().compareFlatKey(rowBloomKey, + && KeyValue.COMPARATOR.compareFlatKey(rowBloomKey, lastBloomKey) > 0) { exists = false; } else { diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/BloomFilterBase.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/BloomFilterBase.java index 3b9ca9a6ed6..8198726b10e 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/BloomFilterBase.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/BloomFilterBase.java @@ -49,9 +49,4 @@ public interface BloomFilterBase { byte[] createBloomKey(byte[] rowBuf, int rowOffset, int rowLen, byte[] qualBuf, int qualOffset, int qualLen); - /** - * @return Bloom key comparator - */ - KVComparator getComparator(); - } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/BloomFilterFactory.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/BloomFilterFactory.java index 5ff7207a334..71908ab29b7 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/BloomFilterFactory.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/BloomFilterFactory.java @@ -199,7 +199,7 @@ public final class BloomFilterFactory { // In case of compound Bloom filters we ignore the maxKeys hint. CompoundBloomFilterWriter bloomWriter = new CompoundBloomFilterWriter(getBloomBlockSize(conf), err, Hash.getHashType(conf), maxFold, cacheConf.shouldCacheBloomsOnWrite(), - bloomType == BloomType.ROWCOL ? KeyValue.COMPARATOR : KeyValue.RAW_COMPARATOR); + bloomType == BloomType.ROWCOL ? KeyValue.COMPARATOR : null); writer.addInlineBlockWriter(bloomWriter); return bloomWriter; } @@ -230,7 +230,7 @@ public final class BloomFilterFactory { // In case of compound Bloom filters we ignore the maxKeys hint. CompoundBloomFilterWriter bloomWriter = new CompoundBloomFilterWriter(getBloomBlockSize(conf), err, Hash.getHashType(conf), maxFold, cacheConf.shouldCacheBloomsOnWrite(), - KeyValue.RAW_COMPARATOR); + null); writer.addInlineBlockWriter(bloomWriter); return bloomWriter; } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/ByteBloomFilter.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/ByteBloomFilter.java index 56c37762f66..8f1f652779f 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/ByteBloomFilter.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/ByteBloomFilter.java @@ -27,8 +27,6 @@ import java.text.NumberFormat; import java.util.Random; import org.apache.hadoop.hbase.classification.InterfaceAudience; -import org.apache.hadoop.hbase.KeyValue; -import org.apache.hadoop.hbase.KeyValue.KVComparator; import org.apache.hadoop.io.Writable; /** @@ -625,12 +623,6 @@ public class ByteBloomFilter implements BloomFilter, BloomFilterWriter { return result; } - @Override - public KVComparator getComparator() { -// return Bytes.BYTES_RAWCOMPARATOR; - return KeyValue.RAW_COMPARATOR; - } - /** * A human-readable string with statistics for the given Bloom filter. * diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/CompoundBloomFilter.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/CompoundBloomFilter.java index beda805ed1a..ce9d2d21429 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/CompoundBloomFilter.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/CompoundBloomFilter.java @@ -24,7 +24,6 @@ import java.io.IOException; import java.nio.ByteBuffer; import org.apache.hadoop.hbase.classification.InterfaceAudience; -import org.apache.hadoop.hbase.KeyValue.KVComparator; import org.apache.hadoop.hbase.io.hfile.BlockType; import org.apache.hadoop.hbase.io.hfile.FixedFileTrailer; import org.apache.hadoop.hbase.io.hfile.HFile; @@ -70,8 +69,13 @@ public class CompoundBloomFilter extends CompoundBloomFilterBase totalKeyCount = meta.readLong(); totalMaxKeys = meta.readLong(); numChunks = meta.readInt(); - comparator = FixedFileTrailer.createComparator( - Bytes.toString(Bytes.readByteArray(meta))); + byte[] comparatorClassName = Bytes.readByteArray(meta); + // The writer would have return 0 as the vint length for the case of + // Bytes.BYTES_RAWCOMPARATOR. In such cases do not initialize comparator, it can be + // null + if (comparatorClassName.length != 0) { + comparator = FixedFileTrailer.createComparator(Bytes.toString(comparatorClassName)); + } hash = Hash.getInstance(hashType); if (hash == null) { @@ -131,11 +135,6 @@ public class CompoundBloomFilter extends CompoundBloomFilterBase return numChunks; } - @Override - public KVComparator getComparator() { - return comparator; - } - public void enableTestingStats() { numQueriesPerChunk = new long[numChunks]; numPositivesPerChunk = new long[numChunks]; @@ -172,7 +171,9 @@ public class CompoundBloomFilter extends CompoundBloomFilterBase sb.append(ByteBloomFilter.STATS_RECORD_SEP + "Number of chunks: " + numChunks); sb.append(ByteBloomFilter.STATS_RECORD_SEP + - "Comparator: " + comparator.getClass().getSimpleName()); + ((comparator != null) ? "Comparator: " + + comparator.getClass().getSimpleName() : "Comparator: " + + Bytes.BYTES_RAWCOMPARATOR.getClass().getSimpleName())); return sb.toString(); } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/CompoundBloomFilterBase.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/CompoundBloomFilterBase.java index af9fa00b770..0ba46e6565d 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/CompoundBloomFilterBase.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/CompoundBloomFilterBase.java @@ -50,7 +50,6 @@ public class CompoundBloomFilterBase implements BloomFilterBase { /** Hash function type to use, as defined in {@link Hash} */ protected int hashType; - /** Comparator used to compare Bloom filter keys */ protected KVComparator comparator; @@ -89,9 +88,4 @@ public class CompoundBloomFilterBase implements BloomFilterBase { return kv.getKey(); } - @Override - public KVComparator getComparator() { - return comparator; - } - } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/CompoundBloomFilterWriter.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/CompoundBloomFilterWriter.java index d436a98b5b0..1cef5238646 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/CompoundBloomFilterWriter.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/CompoundBloomFilterWriter.java @@ -243,8 +243,12 @@ public class CompoundBloomFilterWriter extends CompoundBloomFilterBase // Fields that don't have equivalents in ByteBloomFilter. out.writeInt(numChunks); - Bytes.writeByteArray(out, - Bytes.toBytes(comparator.getClass().getName())); + if (comparator != null) { + Bytes.writeByteArray(out, Bytes.toBytes(comparator.getClass().getName())); + } else { + // Internally writes a 0 vint if the byte[] is null + Bytes.writeByteArray(out, null); + } // Write a single-level index without compression or block header. bloomBlockIndexWriter.writeSingleLevelIndex(out, "Bloom filter");