From 4370fd18e461631894f405b3f3b1ac4ac1e57a1d Mon Sep 17 00:00:00 2001 From: zjushch Date: Sat, 20 Apr 2013 08:38:17 +0000 Subject: [PATCH] HBASE-8317 Seek returns wrong result with PREFIX_TREE Encoding git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1470123 13f79535-47bb-0310-9956-ffa450edef68 --- .../decode/PrefixTreeArraySearcher.java | 6 +- .../row/TestPrefixTreeSearcher.java | 73 +++--- .../io/encoding/TestPrefixTreeEncoding.java | 219 ++++++++++++++++++ 3 files changed, 263 insertions(+), 35 deletions(-) create mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/io/encoding/TestPrefixTreeEncoding.java diff --git a/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/decode/PrefixTreeArraySearcher.java b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/decode/PrefixTreeArraySearcher.java index 097a99757b8..1bb35eb0070 100644 --- a/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/decode/PrefixTreeArraySearcher.java +++ b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/decode/PrefixTreeArraySearcher.java @@ -126,7 +126,11 @@ public class PrefixTreeArraySearcher extends PrefixTreeArrayReversibleScanner im //detect dead end (no fan to descend into) if(!currentRowNode.hasFan()){ if(hasOccurrences()){ - populateFirstNonRowFields(); + if (rowLength < key.getRowLength()) { + nextRow(); + } else { + populateFirstNonRowFields(); + } return CellScannerPosition.AFTER; }else{ //TODO i don't think this case is exercised by any tests diff --git a/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/row/TestPrefixTreeSearcher.java b/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/row/TestPrefixTreeSearcher.java index 02d599b5976..acf9aabb8f6 100644 --- a/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/row/TestPrefixTreeSearcher.java +++ b/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/row/TestPrefixTreeSearcher.java @@ -126,50 +126,55 @@ public class TestPrefixTreeSearcher { } } - /** - * very hard to test nubs with this thing since the a nextRowKey function will usually skip them - */ @Test public void testRandomSeekMisses() throws IOException { CellSearcher searcher = null; List rowStartIndexes = rows.getRowStartIndexes(); try { searcher = DecoderFactory.checkOut(block, true); - for (int i=0; i < rows.getInputs().size(); ++i) { - KeyValue kv = rows.getInputs().get(i); - //nextRow - KeyValue inputNextRow = KeyValueUtil.createFirstKeyInNextRow(kv); + //test both the positionAtOrBefore and positionAtOrAfter methods + for(boolean beforeVsAfterOnMiss : new boolean[]{true, false}){ + for (int i=0; i < rows.getInputs().size(); ++i) { + KeyValue kv = rows.getInputs().get(i); - CellScannerPosition position = searcher.positionAtOrBefore(inputNextRow); - boolean isFirstInRow = rowStartIndexes.contains(i); - if(isFirstInRow){ - int rowIndex = rowStartIndexes.indexOf(i); - if(rowIndex < rowStartIndexes.size() - 1){ -// int lastKvInRowI = rowStartIndexes.get(rowIndex + 1) - 1; - Assert.assertEquals(CellScannerPosition.BEFORE, position); - /* - * Can't get this to work between nubs like rowB\x00 <-> rowBB - * - * No reason to doubt that it works, but will have to come up with a smarter test. - */ -// Assert.assertEquals(rows.getInputs().get(lastKvInRowI), searcher.getCurrentCell()); + //nextRow + KeyValue inputNextRow = KeyValueUtil.createFirstKeyInNextRow(kv); + + CellScannerPosition position = beforeVsAfterOnMiss + ? searcher.positionAtOrBefore(inputNextRow) + : searcher.positionAtOrAfter(inputNextRow); + boolean isFirstInRow = rowStartIndexes.contains(i); + if(isFirstInRow){ + int rowIndex = rowStartIndexes.indexOf(i); + if(rowIndex < rowStartIndexes.size() - 1){ + if(beforeVsAfterOnMiss){ + Assert.assertEquals(CellScannerPosition.BEFORE, position); + }else{ + Assert.assertEquals(CellScannerPosition.AFTER, position); + } + + int expectedInputIndex = beforeVsAfterOnMiss + ? rowStartIndexes.get(rowIndex + 1) - 1 + : rowStartIndexes.get(rowIndex + 1); + Assert.assertEquals(rows.getInputs().get(expectedInputIndex), searcher.current()); + } } - } - //previous KV - KeyValue inputPreviousKv = KeyValueUtil.previousKey(kv); - boolean hit = searcher.positionAt(inputPreviousKv); - Assert.assertFalse(hit); - position = searcher.positionAtOrAfter(inputPreviousKv); - if(CollectionUtils.isLastIndex(rows.getInputs(), i)){ - Assert.assertTrue(CellScannerPosition.AFTER_LAST == position); - }else{ - Assert.assertTrue(CellScannerPosition.AFTER == position); - /* - * TODO: why i+1 instead of i? - */ - Assert.assertEquals(rows.getInputs().get(i+1), searcher.current()); + //previous KV + KeyValue inputPreviousKv = KeyValueUtil.previousKey(kv); + boolean hit = searcher.positionAt(inputPreviousKv); + Assert.assertFalse(hit); + position = searcher.positionAtOrAfter(inputPreviousKv); + if(CollectionUtils.isLastIndex(rows.getInputs(), i)){ + Assert.assertTrue(CellScannerPosition.AFTER_LAST == position); + }else{ + Assert.assertTrue(CellScannerPosition.AFTER == position); + /* + * TODO: why i+1 instead of i? + */ + Assert.assertEquals(rows.getInputs().get(i+1), searcher.current()); + } } } } finally { diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/encoding/TestPrefixTreeEncoding.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/encoding/TestPrefixTreeEncoding.java new file mode 100644 index 00000000000..2ea0db5377a --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/encoding/TestPrefixTreeEncoding.java @@ -0,0 +1,219 @@ +/** + * Copyright The Apache Software Foundation + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +package org.apache.hadoop.hbase.io.encoding; + +import static org.junit.Assert.fail; + +import java.io.ByteArrayOutputStream; +import java.io.DataOutputStream; +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.List; +import java.util.Random; +import java.util.concurrent.ConcurrentSkipListSet; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.SmallTests; +import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeCodec; +import org.apache.hadoop.hbase.io.compress.Compression.Algorithm; +import org.apache.hadoop.hbase.io.encoding.DataBlockEncoder.EncodedSeeker; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.CollectionBackedScanner; +import org.junit.Before; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +/** + * Tests scanning/seeking data with PrefixTree Encoding. + */ +@Category(SmallTests.class) +public class TestPrefixTreeEncoding { + private static final Log LOG = LogFactory + .getLog(TestPrefixTreeEncoding.class); + static final String CF = "EncodingTestCF"; + static final byte[] CF_BYTES = Bytes.toBytes(CF); + private static final int NUM_ROWS_PER_BATCH = 50; + private static final int NUM_COLS_PER_ROW = 20; + + private int numBatchesWritten = 0; + private ConcurrentSkipListSet kvset = new ConcurrentSkipListSet( + KeyValue.COMPARATOR); + + @Before + public void setUp() throws Exception { + kvset.clear(); + } + + @Test + public void testScanWithRandomData() throws Exception { + PrefixTreeCodec encoder = new PrefixTreeCodec(); + ByteBuffer dataBuffer = generateRandomTestData(kvset, numBatchesWritten++); + HFileBlockEncodingContext blkEncodingCtx = new HFileBlockDefaultEncodingContext( + Algorithm.NONE, DataBlockEncoding.PREFIX_TREE, new byte[0]); + encoder.encodeKeyValues(dataBuffer, false, blkEncodingCtx); + EncodedSeeker seeker = encoder.createSeeker(KeyValue.KEY_COMPARATOR, false); + byte[] onDiskBytes=blkEncodingCtx.getOnDiskBytesWithHeader(); + ByteBuffer readBuffer = ByteBuffer.wrap(onDiskBytes, + DataBlockEncoding.ID_SIZE, onDiskBytes.length + - DataBlockEncoding.ID_SIZE); + seeker.setCurrentBuffer(readBuffer); + KeyValue previousKV = null; + do{ + KeyValue currentKV = seeker.getKeyValue(); + if (previousKV != null && KeyValue.COMPARATOR.compare(currentKV, previousKV) < 0) { + dumpInputKVSet(); + fail("Current kv " + currentKV + " is smaller than previous keyvalue " + + previousKV); + } + previousKV = currentKV; + } while (seeker.next()); + } + + @Test + public void testSeekWithRandomData() throws Exception { + PrefixTreeCodec encoder = new PrefixTreeCodec(); + int batchId = numBatchesWritten++; + ByteBuffer dataBuffer = generateRandomTestData(kvset, batchId); + HFileBlockEncodingContext blkEncodingCtx = new HFileBlockDefaultEncodingContext( + Algorithm.NONE, DataBlockEncoding.PREFIX_TREE, new byte[0]); + encoder.encodeKeyValues(dataBuffer, false, blkEncodingCtx); + EncodedSeeker seeker = encoder.createSeeker(KeyValue.KEY_COMPARATOR, false); + byte[] onDiskBytes = blkEncodingCtx.getOnDiskBytesWithHeader(); + ByteBuffer readBuffer = ByteBuffer.wrap(onDiskBytes, + DataBlockEncoding.ID_SIZE, onDiskBytes.length + - DataBlockEncoding.ID_SIZE); + verifySeeking(seeker, readBuffer, batchId); + } + + @Test + public void testSeekWithFixedData() throws Exception { + PrefixTreeCodec encoder = new PrefixTreeCodec(); + int batchId = numBatchesWritten++; + ByteBuffer dataBuffer = generateFixedTestData(kvset, batchId); + HFileBlockEncodingContext blkEncodingCtx = new HFileBlockDefaultEncodingContext( + Algorithm.NONE, DataBlockEncoding.PREFIX_TREE, new byte[0]); + encoder.encodeKeyValues(dataBuffer, false, blkEncodingCtx); + EncodedSeeker seeker = encoder.createSeeker(KeyValue.KEY_COMPARATOR, + false); + byte[] onDiskBytes = blkEncodingCtx.getOnDiskBytesWithHeader(); + ByteBuffer readBuffer = ByteBuffer.wrap(onDiskBytes, + DataBlockEncoding.ID_SIZE, onDiskBytes.length + - DataBlockEncoding.ID_SIZE); + verifySeeking(seeker, readBuffer, batchId); + } + + private void verifySeeking(EncodedSeeker encodeSeeker, + ByteBuffer encodedData, int batchId) { + List kvList = new ArrayList(); + for (int i = 0; i < NUM_ROWS_PER_BATCH; ++i) { + kvList.clear(); + encodeSeeker.setCurrentBuffer(encodedData); + KeyValue firstOnRow = KeyValue.createFirstOnRow(getRowKey(batchId, i)); + encodeSeeker.seekToKeyInBlock(firstOnRow.getBuffer(), + firstOnRow.getKeyOffset(), firstOnRow.getKeyLength(), false); + boolean hasMoreOfEncodeScanner = encodeSeeker.next(); + CollectionBackedScanner collectionScanner = new CollectionBackedScanner( + this.kvset); + boolean hasMoreOfCollectionScanner = collectionScanner.seek(firstOnRow); + if (hasMoreOfEncodeScanner != hasMoreOfCollectionScanner) { + dumpInputKVSet(); + fail("Get error result after seeking " + firstOnRow); + } + if (hasMoreOfEncodeScanner) { + if (KeyValue.COMPARATOR.compare(encodeSeeker.getKeyValue(), + collectionScanner.peek()) != 0) { + dumpInputKVSet(); + fail("Expected " + collectionScanner.peek() + " actual " + + encodeSeeker.getKeyValue() + ", after seeking " + firstOnRow); + } + } + } + } + + private void dumpInputKVSet() { + LOG.info("Dumping input keyvalue set in error case:"); + for (KeyValue kv : kvset) { + System.out.println(kv); + } + } + + private static ByteBuffer generateFixedTestData( + ConcurrentSkipListSet kvset, int batchId) throws Exception { + ByteArrayOutputStream baosInMemory = new ByteArrayOutputStream(); + DataOutputStream userDataStream = new DataOutputStream(baosInMemory); + for (int i = 0; i < NUM_ROWS_PER_BATCH; ++i) { + if (i / 10 % 2 == 1) continue; + for (int j = 0; j < NUM_COLS_PER_ROW; ++j) { + KeyValue kv = new KeyValue(getRowKey(batchId, i), CF_BYTES, + getQualifier(j), getValue(batchId, i, j)); + kvset.add(kv); + } + } + for (KeyValue kv : kvset) { + userDataStream.writeInt(kv.getKeyLength()); + userDataStream.writeInt(kv.getValueLength()); + userDataStream + .write(kv.getBuffer(), kv.getKeyOffset(), kv.getKeyLength()); + userDataStream.write(kv.getBuffer(), kv.getValueOffset(), + kv.getValueLength()); + } + return ByteBuffer.wrap(baosInMemory.toByteArray()); + } + + private static ByteBuffer generateRandomTestData( + ConcurrentSkipListSet kvset, int batchId) throws Exception { + ByteArrayOutputStream baosInMemory = new ByteArrayOutputStream(); + DataOutputStream userDataStream = new DataOutputStream(baosInMemory); + Random random = new Random(); + for (int i = 0; i < NUM_ROWS_PER_BATCH; ++i) { + if (random.nextInt(100) < 50) continue; + for (int j = 0; j < NUM_COLS_PER_ROW; ++j) { + if (random.nextInt(100) < 50) continue; + KeyValue kv = new KeyValue(getRowKey(batchId, i), CF_BYTES, + getQualifier(j), getValue(batchId, i, j)); + kvset.add(kv); + } + } + for (KeyValue kv : kvset) { + userDataStream.writeInt(kv.getKeyLength()); + userDataStream.writeInt(kv.getValueLength()); + userDataStream + .write(kv.getBuffer(), kv.getKeyOffset(), kv.getKeyLength()); + userDataStream.write(kv.getBuffer(), kv.getValueOffset(), + kv.getValueLength()); + } + return ByteBuffer.wrap(baosInMemory.toByteArray()); + } + + private static byte[] getRowKey(int batchId, int i) { + return Bytes.toBytes("batch" + batchId + "_row" + i); + } + + private static byte[] getQualifier(int j) { + return Bytes.toBytes("col" + j); + } + + private static byte[] getValue(int batchId, int i, int j) { + return Bytes.toBytes("value_for_" + Bytes.toString(getRowKey(batchId, i)) + + "_col" + j); + } + +}