From 95039f08b1b9fcba486c9d68cd88a36af0e9821e Mon Sep 17 00:00:00 2001 From: Bryan Beaudreault Date: Sat, 4 Feb 2023 12:12:29 -0500 Subject: [PATCH] HBASE-27580 Reverse scan over rows with tags throw exceptions when using DataBlockEncoding (#5006) Signed-off-by: Duo Zhang --- .../io/encoding/BufferedDataBlockEncoder.java | 11 ++- .../hadoop/hbase/regionserver/TestTags.java | 71 +++++++++++++++++++ 2 files changed, 81 insertions(+), 1 deletion(-) diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/BufferedDataBlockEncoder.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/BufferedDataBlockEncoder.java index 4908b927bfe..a88d9fbdc16 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/BufferedDataBlockEncoder.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/BufferedDataBlockEncoder.java @@ -792,6 +792,15 @@ abstract class BufferedDataBlockEncoder extends AbstractDataBlockEncoder { currentBuffer.rewind(); if (tagCompressionContext != null) { tagCompressionContext.clear(); + // Prior seekToKeyInBlock may have reset this to false if we fell back to previous + // seeker state. This is an optimization so we don't have to uncompress tags again when + // reading last state. + // In case of rewind, we are starting from the beginning of the buffer, so we need + // to uncompress any tags we see. + // It may make sense to reset this in setCurrentBuffer as well, but we seem to only call + // setCurrentBuffer after StoreFileScanner.seekAtOrAfter which calls next to consume the + // seeker state. Rewind is called by seekBefore, which doesn't and leaves us in this state. + current.uncompressTags = true; } decodeFirst(); current.setKey(current.keyBuffer, current.memstoreTS); @@ -818,7 +827,7 @@ abstract class BufferedDataBlockEncoder extends AbstractDataBlockEncoder { try { current.tagsCompressedLength = tagCompressionContext.uncompressTags(currentBuffer, current.tagsBuffer, 0, current.tagsLength); - } catch (IOException e) { + } catch (Exception e) { throw new RuntimeException("Exception while uncompressing tags", e); } } else { diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestTags.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestTags.java index a263b0d1955..3d45fc56503 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestTags.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestTags.java @@ -41,6 +41,8 @@ import org.apache.hadoop.hbase.client.Admin; import org.apache.hadoop.hbase.client.Append; import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder; import org.apache.hadoop.hbase.client.CompactionState; +import org.apache.hadoop.hbase.client.Connection; +import org.apache.hadoop.hbase.client.ConnectionFactory; import org.apache.hadoop.hbase.client.Durability; import org.apache.hadoop.hbase.client.Increment; import org.apache.hadoop.hbase.client.Mutation; @@ -69,6 +71,8 @@ import org.junit.Rule; import org.junit.Test; import org.junit.experimental.categories.Category; import org.junit.rules.TestName; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * Class that test tags @@ -79,6 +83,8 @@ public class TestTags { @ClassRule public static final HBaseClassTestRule CLASS_RULE = HBaseClassTestRule.forClass(TestTags.class); + private static final Logger LOG = LoggerFactory.getLogger(TestTags.class); + static boolean useFilter = false; private final static HBaseTestingUtil TEST_UTIL = new HBaseTestingUtil(); @@ -105,6 +111,71 @@ public class TestTags { useFilter = false; } + /** + * Test that we can do reverse scans when writing tags and using DataBlockEncoding. Fails with an + * exception for PREFIX, DIFF, and FAST_DIFF prior to HBASE-27580 + */ + @Test + public void testReverseScanWithDBE() throws IOException { + byte[] family = Bytes.toBytes("0"); + + Configuration conf = new Configuration(TEST_UTIL.getConfiguration()); + conf.setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 1); + + try (Connection connection = ConnectionFactory.createConnection(conf)) { + for (DataBlockEncoding encoding : DataBlockEncoding.values()) { + testReverseScanWithDBE(connection, encoding, family); + } + } + } + + private void testReverseScanWithDBE(Connection conn, DataBlockEncoding encoding, byte[] family) + throws IOException { + LOG.info("Running test with DBE={}", encoding); + TableName tableName = TableName.valueOf(TEST_NAME.getMethodName() + "-" + encoding); + TEST_UTIL.createTable(TableDescriptorBuilder.newBuilder(tableName) + .setColumnFamily( + ColumnFamilyDescriptorBuilder.newBuilder(family).setDataBlockEncoding(encoding).build()) + .build(), null); + + Table table = conn.getTable(tableName); + + int maxRows = 10; + byte[] val1 = new byte[10]; + byte[] val2 = new byte[10]; + Bytes.random(val1); + Bytes.random(val2); + + for (int i = 0; i < maxRows; i++) { + if (i == maxRows / 2) { + TEST_UTIL.flush(tableName); + } + table.put(new Put(Bytes.toBytes(i)).addColumn(family, Bytes.toBytes(1), val1) + .addColumn(family, Bytes.toBytes(2), val2).setTTL(600_000)); + } + + TEST_UTIL.flush(table.getName()); + + Scan scan = new Scan(); + scan.setReversed(true); + + try (ResultScanner scanner = table.getScanner(scan)) { + for (int i = maxRows - 1; i >= 0; i--) { + Result row = scanner.next(); + assertEquals(2, row.size()); + + Cell cell1 = row.getColumnLatestCell(family, Bytes.toBytes(1)); + assertTrue(CellUtil.matchingRows(cell1, Bytes.toBytes(i))); + assertTrue(CellUtil.matchingValue(cell1, val1)); + + Cell cell2 = row.getColumnLatestCell(family, Bytes.toBytes(2)); + assertTrue(CellUtil.matchingRows(cell2, Bytes.toBytes(i))); + assertTrue(CellUtil.matchingValue(cell2, val2)); + } + } + + } + @Test public void testTags() throws Exception { Table table = null;