diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlockIndex.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlockIndex.java index 222e1468c8b..46e3261db65 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlockIndex.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlockIndex.java @@ -76,6 +76,16 @@ public class HFileBlockIndex { */ public static final String MAX_CHUNK_SIZE_KEY = "hfile.index.block.max.size"; + /** + * Minimum number of entries in a single index block. Even if we are above the + * hfile.index.block.max.size we will keep writing to the same block unless we have that many + * entries. We should have at least a few entries so that we don't have too many levels in the + * multi-level index. This should be at least 2 to make sure there is no infinite recursion. + */ + public static final String MIN_INDEX_NUM_ENTRIES_KEY = "hfile.index.block.min.entries"; + + static final int DEFAULT_MIN_INDEX_NUM_ENTRIES = 16; + /** * The number of bytes stored in each "secondary index" entry in addition to * key bytes in the non-root index block format. The first long is the file @@ -120,6 +130,7 @@ public class HFileBlockIndex { searchTreeLevel = treeLevel; } + @Override protected long calculateHeapSizeForBlockKeys(long heapSize) { // Calculating the size of blockKeys if (blockKeys != null) { @@ -162,10 +173,12 @@ public class HFileBlockIndex { return null; } + @Override protected void initialize(int numEntries) { blockKeys = new byte[numEntries][]; } + @Override protected void add(final byte[] key, final long offset, final int dataSize) { blockOffsets[rootCount] = offset; blockKeys[rootCount] = key; @@ -242,6 +255,7 @@ public class HFileBlockIndex { searchTreeLevel = treeLevel; } + @Override protected long calculateHeapSizeForBlockKeys(long heapSize) { if (blockKeys != null) { heapSize += ClassSize.REFERENCE; @@ -430,6 +444,7 @@ public class HFileBlockIndex { return targetMidKey; } + @Override protected void initialize(int numEntries) { blockKeys = new Cell[numEntries]; } @@ -441,6 +456,7 @@ public class HFileBlockIndex { * @param offset file offset where the block is stored * @param dataSize the uncompressed data size */ + @Override protected void add(final byte[] key, final long offset, final int dataSize) { blockOffsets[rootCount] = offset; // Create the blockKeys as Cells once when the reader is opened @@ -569,7 +585,7 @@ public class HFileBlockIndex { * Return the BlockWithScanInfo which contains the DataBlock with other scan * info such as nextIndexedKey. This function will only be called when the * HFile version is larger than 1. - * + * * @param key * the key we are looking for * @param currentBlock @@ -623,7 +639,7 @@ public class HFileBlockIndex { /** * Finds the root-level index block containing the given key. - * + * * @param key * Key to find * @param comp @@ -701,7 +717,7 @@ public class HFileBlockIndex { * Performs a binary search over a non-root level index block. Utilizes the * secondary index, which records the offsets of (offset, onDiskSize, * firstKey) tuples of all entries. - * + * * @param key * the key we are searching for offsets to individual entries in * the blockIndex buffer @@ -985,6 +1001,9 @@ public class HFileBlockIndex { /** The maximum size guideline of all multi-level index blocks. */ private int maxChunkSize; + /** The maximum level of multi-level index blocks */ + private int minIndexNumEntries; + /** Whether we require this block index to always be single-level. */ private boolean singleLevelOnly; @@ -1017,15 +1036,23 @@ public class HFileBlockIndex { this.cacheConf = cacheConf; this.nameForCaching = nameForCaching; this.maxChunkSize = HFileBlockIndex.DEFAULT_MAX_CHUNK_SIZE; + this.minIndexNumEntries = HFileBlockIndex.DEFAULT_MIN_INDEX_NUM_ENTRIES; } public void setMaxChunkSize(int maxChunkSize) { if (maxChunkSize <= 0) { - throw new IllegalArgumentException("Invald maximum index block size"); + throw new IllegalArgumentException("Invalid maximum index block size"); } this.maxChunkSize = maxChunkSize; } + public void setMinIndexNumEntries(int minIndexNumEntries) { + if (minIndexNumEntries <= 1) { + throw new IllegalArgumentException("Invalid maximum index level, should be >= 2"); + } + this.minIndexNumEntries = minIndexNumEntries; + } + /** * Writes the root level and intermediate levels of the block index into * the output stream, generating the tree from bottom up. Assumes that the @@ -1057,7 +1084,11 @@ public class HFileBlockIndex { : null; if (curInlineChunk != null) { - while (rootChunk.getRootSize() > maxChunkSize) { + while (rootChunk.getRootSize() > maxChunkSize + // HBASE-16288: if firstKey is larger than maxChunkSize we will loop indefinitely + && rootChunk.getNumEntries() > minIndexNumEntries + // Sanity check. We will not hit this (minIndexNumEntries ^ 16) blocks can be addressed + && numLevels < 16) { rootChunk = writeIntermediateLevel(out, rootChunk); numLevels += 1; } @@ -1153,8 +1184,12 @@ public class HFileBlockIndex { curChunk.add(currentLevel.getBlockKey(i), currentLevel.getBlockOffset(i), currentLevel.getOnDiskDataSize(i)); - if (curChunk.getRootSize() >= maxChunkSize) + // HBASE-16288: We have to have at least minIndexNumEntries(16) items in the index so that + // we won't end up with too-many levels for a index with very large rowKeys. Also, if the + // first key is larger than maxChunkSize this will cause infinite recursion. + if (i >= minIndexNumEntries && curChunk.getRootSize() >= maxChunkSize) { writeIntermediateBlock(out, parent, curChunk); + } } if (curChunk.getNumEntries() > 0) { @@ -1647,4 +1682,8 @@ public class HFileBlockIndex { public static int getMaxChunkSize(Configuration conf) { return conf.getInt(MAX_CHUNK_SIZE_KEY, DEFAULT_MAX_CHUNK_SIZE); } + + public static int getMinIndexNumEntries(Configuration conf) { + return conf.getInt(MIN_INDEX_NUM_ENTRIES_KEY, DEFAULT_MIN_INDEX_NUM_ENTRIES); + } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterImpl.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterImpl.java index 576974493c2..c57ecf746ce 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterImpl.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterImpl.java @@ -287,6 +287,8 @@ public class HFileWriterImpl implements HFile.Writer { cacheIndexesOnWrite ? name : null); dataBlockIndexWriter.setMaxChunkSize( HFileBlockIndex.getMaxChunkSize(conf)); + dataBlockIndexWriter.setMinIndexNumEntries( + HFileBlockIndex.getMinIndexNumEntries(conf)); inlineBlockWriters.add(dataBlockIndexWriter); // Meta data block index writer @@ -327,13 +329,13 @@ public class HFileWriterImpl implements HFile.Writer { doCacheOnWrite(lastDataBlockOffset); } } - + /** * Try to return a Cell that falls between left and * right but that is shorter; i.e. takes up less space. This * trick is used building HFile block index. Its an optimization. It does not * always work. In this case we'll just return the right cell. - * + * * @param comparator * Comparator to use. * @param left @@ -764,4 +766,4 @@ public class HFileWriterImpl implements HFile.Writer { outputStream = null; } } -} \ No newline at end of file +} diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlockIndex.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlockIndex.java index 470d48358e1..810dd9fa5ff 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlockIndex.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlockIndex.java @@ -348,7 +348,7 @@ public class TestHFileBlockIndex { for (int i = 0; i < numTotalKeys; ++i) { byte[] k = RandomKeyValueUtil.randomOrderedKey(rand, i * 2); - KeyValue cell = new KeyValue(k, Bytes.toBytes("f"), Bytes.toBytes("q"), + KeyValue cell = new KeyValue(k, Bytes.toBytes("f"), Bytes.toBytes("q"), Bytes.toBytes("val")); //KeyValue cell = new KeyValue.KeyOnlyKeyValue(k, 0, k.length); keys.add(cell.getKey()); @@ -671,6 +671,48 @@ public class TestHFileBlockIndex { valueRead); } + @Test(timeout=10000) + public void testIntermediateLevelIndicesWithLargeKeys() throws IOException { + testIntermediateLevelIndicesWithLargeKeys(16); + } + @Test(timeout=10000) + public void testIntermediateLevelIndicesWithLargeKeysWithMinNumEntries() throws IOException { + // because of the large rowKeys, we will end up with a 50-level block index without sanity check + testIntermediateLevelIndicesWithLargeKeys(2); + } + + public void testIntermediateLevelIndicesWithLargeKeys(int minNumEntries) throws IOException { + Path hfPath = new Path(TEST_UTIL.getDataTestDir(), + "testIntermediateLevelIndicesWithLargeKeys.hfile"); + int maxChunkSize = 1024; + FileSystem fs = FileSystem.get(conf); + CacheConfig cacheConf = new CacheConfig(conf); + conf.setInt(HFileBlockIndex.MAX_CHUNK_SIZE_KEY, maxChunkSize); + conf.setInt(HFileBlockIndex.MIN_INDEX_NUM_ENTRIES_KEY, minNumEntries); + HFileContext context = new HFileContextBuilder().withBlockSize(16).build(); + HFile.Writer hfw = new HFile.WriterFactory(conf, cacheConf) + .withFileContext(context) + .withPath(fs, hfPath).create(); + List keys = new ArrayList(); + + // This should result in leaf-level indices and a root level index + for (int i=0; i < 100; i++) { + byte[] rowkey = new byte[maxChunkSize + 1]; + byte[] b = Bytes.toBytes(i); + System.arraycopy(b, 0, rowkey, rowkey.length - b.length, b.length); + keys.add(rowkey); + hfw.append(CellUtil.createCell(rowkey)); + } + hfw.close(); + + HFile.Reader reader = HFile.createReader(fs, hfPath, cacheConf, conf); + // Scanner doesn't do Cells yet. Fix. + HFileScanner scanner = reader.getScanner(true, true); + for (int i = 0; i < keys.size(); ++i) { + scanner.seekTo(CellUtil.createCell(keys.get(i))); + } + reader.close(); + } }