From acc4d13e7f7b8261c2d5d3ef0cb7ae8031363125 Mon Sep 17 00:00:00 2001 From: chenglei Date: Thu, 17 Nov 2022 23:13:12 +0800 Subject: [PATCH] HBASE-27464 In memory compaction 'COMPACT' may cause data corruption when adding cells large than maxAlloc(default 256k) size (#4881) Co-authored-by: comnetwork Signed-off-by: Duo Zhang (cherry picked from commit 7b0ac6451d6ca51c310c41a194628f90e3815c17) --- .../CellChunkImmutableSegment.java | 7 +- .../hadoop/hbase/regionserver/TestHStore.java | 102 ++++++++++++++++++ 2 files changed, 107 insertions(+), 2 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/CellChunkImmutableSegment.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/CellChunkImmutableSegment.java index fb821fb5681..de6377668f9 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/CellChunkImmutableSegment.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/CellChunkImmutableSegment.java @@ -159,8 +159,11 @@ public class CellChunkImmutableSegment extends ImmutableSegment { offsetInCurentChunk = ChunkCreator.SIZEOF_CHUNK_HEADER; } if (action == MemStoreCompactionStrategy.Action.COMPACT && !alreadyCopied) { - // for compaction copy cell to the new segment (MSLAB copy) - c = maybeCloneWithAllocator(c, false); + + // For compaction copy cell to the new segment (MSLAB copy),here we set forceCloneOfBigCell + // to true, because the chunk which the cell is allocated may be freed after the compaction + // is completed, see HBASE-27464. + c = maybeCloneWithAllocator(c, true); } offsetInCurentChunk = // add the Cell reference to the index chunk createCellReference((ByteBufferKeyValue) c, chunks[currentChunkIdx].getData(), diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHStore.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHStore.java index 78aa83737ae..62fba4b6c30 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHStore.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHStore.java @@ -68,6 +68,7 @@ import org.apache.hadoop.hbase.CellBuilderType; import org.apache.hadoop.hbase.CellComparator; import org.apache.hadoop.hbase.CellComparatorImpl; import org.apache.hadoop.hbase.CellUtil; +import org.apache.hadoop.hbase.ExtendedCell; import org.apache.hadoop.hbase.HBaseClassTestRule; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.HBaseTestingUtility; @@ -97,6 +98,7 @@ import org.apache.hadoop.hbase.io.hfile.HFileContext; import org.apache.hadoop.hbase.io.hfile.HFileContextBuilder; import org.apache.hadoop.hbase.monitoring.MonitoredTask; import org.apache.hadoop.hbase.quotas.RegionSizeStoreImpl; +import org.apache.hadoop.hbase.regionserver.ChunkCreator.ChunkType; import org.apache.hadoop.hbase.regionserver.MemStoreCompactionStrategy.Action; import org.apache.hadoop.hbase.regionserver.compactions.CompactionConfiguration; import org.apache.hadoop.hbase.regionserver.compactions.DefaultCompactor; @@ -1807,6 +1809,106 @@ public class TestHStore { myCompactingMemStore.inMemoryCompactionEndCyclicBarrier.await(); } + /** + * This test is for HBASE-27464, before this JIRA,when init {@link CellChunkImmutableSegment} for + * 'COMPACT' action, we not force copy to current MSLab. When cell size bigger than + * {@link MemStoreLABImpl#maxAlloc}, cell will stay in previous chunk which will recycle after + * segment replace, and we may read wrong data when these chunk reused by others. + */ + @Test + public void testForceCloneOfBigCellForCellChunkImmutableSegment() throws Exception { + Configuration conf = HBaseConfiguration.create(); + int maxAllocByteSize = conf.getInt(MemStoreLAB.MAX_ALLOC_KEY, MemStoreLAB.MAX_ALLOC_DEFAULT); + + // Construct big cell,which is large than {@link MemStoreLABImpl#maxAlloc}. + byte[] cellValue = new byte[maxAllocByteSize + 1]; + final long timestamp = EnvironmentEdgeManager.currentTime(); + final long seqId = 100; + final byte[] rowKey1 = Bytes.toBytes("rowKey1"); + final Cell originalCell1 = createCell(rowKey1, qf1, timestamp, seqId, cellValue); + final byte[] rowKey2 = Bytes.toBytes("rowKey2"); + final Cell originalCell2 = createCell(rowKey2, qf1, timestamp, seqId, cellValue); + TreeSet quals = new TreeSet<>(Bytes.BYTES_COMPARATOR); + quals.add(qf1); + + int cellByteSize = MutableSegment.getCellLength(originalCell1); + int inMemoryFlushByteSize = cellByteSize - 1; + + // set CompactingMemStore.inmemoryFlushSize to flushByteSize. + conf.set(HStore.MEMSTORE_CLASS_NAME, MyCompactingMemStore6.class.getName()); + conf.setDouble(CompactingMemStore.IN_MEMORY_FLUSH_THRESHOLD_FACTOR_KEY, 0.005); + conf.set(HConstants.HREGION_MEMSTORE_FLUSH_SIZE, String.valueOf(inMemoryFlushByteSize * 200)); + conf.setBoolean(WALFactory.WAL_ENABLED, false); + + // Use {@link MemoryCompactionPolicy#EAGER} for always compacting. + init(name.getMethodName(), conf, ColumnFamilyDescriptorBuilder.newBuilder(family) + .setInMemoryCompaction(MemoryCompactionPolicy.EAGER).build()); + + MyCompactingMemStore6 myCompactingMemStore = ((MyCompactingMemStore6) store.memstore); + assertTrue((int) (myCompactingMemStore.getInmemoryFlushSize()) == inMemoryFlushByteSize); + + // Data chunk Pool is disabled. + assertTrue(ChunkCreator.getInstance().getMaxCount(ChunkType.DATA_CHUNK) == 0); + + MemStoreSizing memStoreSizing = new NonThreadSafeMemStoreSizing(); + + // First compact + store.add(originalCell1, memStoreSizing); + // Waiting for the first in-memory compaction finished + myCompactingMemStore.inMemoryCompactionEndCyclicBarrier.await(); + + StoreScanner storeScanner = + (StoreScanner) store.getScanner(new Scan(new Get(rowKey1)), quals, seqId + 1); + SegmentScanner segmentScanner = getTypeKeyValueScanner(storeScanner, SegmentScanner.class); + Cell resultCell1 = segmentScanner.next(); + assertTrue(CellUtil.equals(resultCell1, originalCell1)); + int cell1ChunkId = ((ExtendedCell) resultCell1).getChunkId(); + assertTrue(cell1ChunkId != ExtendedCell.CELL_NOT_BASED_ON_CHUNK); + assertNull(segmentScanner.next()); + segmentScanner.close(); + storeScanner.close(); + Segment segment = segmentScanner.segment; + assertTrue(segment instanceof CellChunkImmutableSegment); + MemStoreLABImpl memStoreLAB1 = (MemStoreLABImpl) (segmentScanner.segment.getMemStoreLAB()); + assertTrue(!memStoreLAB1.isClosed()); + assertTrue(!memStoreLAB1.chunks.isEmpty()); + assertTrue(!memStoreLAB1.isReclaimed()); + + // Second compact + store.add(originalCell2, memStoreSizing); + // Waiting for the second in-memory compaction finished + myCompactingMemStore.inMemoryCompactionEndCyclicBarrier.await(); + + // Before HBASE-27464, here may throw java.lang.IllegalArgumentException: In CellChunkMap, cell + // must be associated with chunk.. We were looking for a cell at index 0. + // The cause for this exception is because the data chunk Pool is disabled,when the data chunks + // are recycled after the second in-memory compaction finished,the + // {@link ChunkCreator.putbackChunks} method does not put the chunks back to the data chunk + // pool,it just removes them from {@link ChunkCreator#chunkIdMap},so in + // {@link CellChunkMap#getCell} we could not get the data chunk by chunkId. + storeScanner = (StoreScanner) store.getScanner(new Scan(new Get(rowKey1)), quals, seqId + 1); + segmentScanner = getTypeKeyValueScanner(storeScanner, SegmentScanner.class); + Cell newResultCell1 = segmentScanner.next(); + assertTrue(newResultCell1 != resultCell1); + assertTrue(CellUtil.equals(newResultCell1, originalCell1)); + + Cell resultCell2 = segmentScanner.next(); + assertTrue(CellUtil.equals(resultCell2, originalCell2)); + assertNull(segmentScanner.next()); + segmentScanner.close(); + storeScanner.close(); + + segment = segmentScanner.segment; + assertTrue(segment instanceof CellChunkImmutableSegment); + MemStoreLABImpl memStoreLAB2 = (MemStoreLABImpl) (segmentScanner.segment.getMemStoreLAB()); + assertTrue(!memStoreLAB2.isClosed()); + assertTrue(!memStoreLAB2.chunks.isEmpty()); + assertTrue(!memStoreLAB2.isReclaimed()); + assertTrue(memStoreLAB1.isClosed()); + assertTrue(memStoreLAB1.chunks.isEmpty()); + assertTrue(memStoreLAB1.isReclaimed()); + } + // This test is for HBASE-26210 also, test write large cell and small cell concurrently when // InmemoryFlushSize is smaller,equal with and larger than cell size. @Test