HBASE-26567 Remove IndexType from ChunkCreator (#3947)

Signed-off-by: Duo Zhang <zhangduo@apache.org>
This commit is contained in:
chenglei 2022-01-05 00:05:54 +08:00 committed by Duo Zhang
parent 10eea31ca9
commit fb1c88adac
5 changed files with 114 additions and 88 deletions

View File

@ -28,6 +28,7 @@ import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.atomic.LongAdder;
import org.apache.hadoop.hbase.regionserver.HeapMemoryManager.HeapMemoryTuneObserver;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.util.StringUtils;
@ -134,35 +135,20 @@ public class ChunkCreator {
return instance;
}
/**
* Creates and inits a chunk. The default implementation for a specific chunk size.
* @return the chunk that was initialized
*/
Chunk getChunk(ChunkType chunkType) {
return getChunk(CompactingMemStore.IndexType.ARRAY_MAP, chunkType);
}
/**
* Creates and inits a chunk. The default implementation.
* Creates and inits a data chunk. The default implementation.
* @return the chunk that was initialized
*/
Chunk getChunk() {
return getChunk(CompactingMemStore.IndexType.ARRAY_MAP, ChunkType.DATA_CHUNK);
return getChunk(ChunkType.DATA_CHUNK);
}
/**
* Creates and inits a chunk. The default implementation for a specific index type.
* Creates and inits a chunk with specific type.
* @return the chunk that was initialized
*/
Chunk getChunk(CompactingMemStore.IndexType chunkIndexType) {
return getChunk(chunkIndexType, ChunkType.DATA_CHUNK);
}
/**
* Creates and inits a chunk with specific index type and type.
* @return the chunk that was initialized
*/
Chunk getChunk(CompactingMemStore.IndexType chunkIndexType, ChunkType chunkType) {
Chunk getChunk(ChunkType chunkType) {
switch (chunkType) {
case INDEX_CHUNK:
if (indexChunksPool == null) {
@ -170,15 +156,15 @@ public class ChunkCreator {
throw new IllegalArgumentException(
"chunkType is INDEX_CHUNK but indexChunkSize is:[" + this.indexChunkSize + "]");
}
return getChunk(chunkIndexType, chunkType, indexChunkSize);
return getChunk(chunkType, indexChunkSize);
} else {
return getChunk(chunkIndexType, chunkType, indexChunksPool.getChunkSize());
return getChunk(chunkType, indexChunksPool.getChunkSize());
}
case DATA_CHUNK:
if (dataChunksPool == null) {
return getChunk(chunkIndexType, chunkType, chunkSize);
return getChunk(chunkType, chunkSize);
} else {
return getChunk(chunkIndexType, chunkType, dataChunksPool.getChunkSize());
return getChunk(chunkType, dataChunksPool.getChunkSize());
}
default:
throw new IllegalArgumentException(
@ -189,10 +175,9 @@ public class ChunkCreator {
/**
* Creates and inits a chunk.
* @return the chunk that was initialized
* @param chunkIndexType whether the requested chunk is going to be used with CellChunkMap index
* @param size the size of the chunk to be allocated, in bytes
*/
Chunk getChunk(CompactingMemStore.IndexType chunkIndexType, ChunkType chunkType, int size) {
Chunk getChunk(ChunkType chunkType, int size) {
Chunk chunk = null;
MemStoreChunkPool pool = null;
@ -217,9 +202,7 @@ public class ChunkCreator {
}
if (chunk == null) {
// the second parameter explains whether CellChunkMap index is requested,
// in that case, put allocated on demand chunk mapping into chunkIdMap
chunk = createChunk(false, chunkIndexType, chunkType, size);
chunk = createChunk(false, chunkType, size);
}
// now we need to actually do the expensive memory allocation step in case of a new chunk,
@ -240,22 +223,21 @@ public class ChunkCreator {
if (allocSize <= this.getChunkSize(ChunkType.DATA_CHUNK)) {
LOG.warn("Jumbo chunk size " + jumboSize + " must be more than regular chunk size "
+ this.getChunkSize(ChunkType.DATA_CHUNK) + ". Converting to regular chunk.");
return getChunk(CompactingMemStore.IndexType.CHUNK_MAP);
return getChunk();
}
// the new chunk is going to hold the jumbo cell data and needs to be referenced by
// a strong map. Therefore the CCM index type
return getChunk(CompactingMemStore.IndexType.CHUNK_MAP, ChunkType.JUMBO_CHUNK, allocSize);
// a strong map.
return getChunk(ChunkType.JUMBO_CHUNK, allocSize);
}
/**
* Creates the chunk either onheap or offheap
* @param pool indicates if the chunks have to be created which will be used by the Pool
* @param chunkIndexType whether the requested chunk is going to be used with CellChunkMap index
* @param chunkType whether the requested chunk is data chunk or index chunk.
* @param size the size of the chunk to be allocated, in bytes
* @return the chunk
*/
private Chunk createChunk(boolean pool, CompactingMemStore.IndexType chunkIndexType,
ChunkType chunkType, int size) {
private Chunk createChunk(boolean pool, ChunkType chunkType, int size) {
Chunk chunk = null;
int id = chunkID.getAndIncrement();
assert id > 0;
@ -265,22 +247,39 @@ public class ChunkCreator {
} else {
chunk = new OnheapChunk(size, id, chunkType, pool);
}
if (pool || (chunkIndexType == CompactingMemStore.IndexType.CHUNK_MAP)) {
// put the pool chunk into the chunkIdMap so it is not GC-ed
this.chunkIdMap.put(chunk.getId(), chunk);
}
/**
* Here we always put the chunk into the {@link ChunkCreator#chunkIdMap} no matter whether the
* chunk is pooled or not. <br/>
* For {@link CompactingMemStore},because the chunk could only be acquired from
* {@link ChunkCreator} through {@link MemStoreLABImpl}, and
* {@link CompactingMemStore#indexType} could only be {@link IndexType.CHUNK_MAP} when using
* {@link MemStoreLABImpl}, so we must put chunk into this {@link ChunkCreator#chunkIdMap} to
* make sure the chunk could be got by chunkId.
* <p>
* For {@link DefaultMemStore},it is also reasonable to put the chunk in
* {@link ChunkCreator#chunkIdMap} because: <br/>
* 1.When the {@link MemStoreLAB} which created the chunk is not closed, this chunk is used by
* the {@link Segment} which references this {@link MemStoreLAB}, so this chunk certainly should
* not be GC-ed, putting the chunk in {@link ChunkCreator#chunkIdMap} does not prevent useless
* chunk to be GC-ed. <br/>
* 2.When the {@link MemStoreLAB} which created the chunk is closed, and if the chunk is not
* pooled, {@link ChunkCreator#removeChunk} is invoked to remove the chunk from this
* {@link ChunkCreator#chunkIdMap}, so there is no memory leak.
*/
this.chunkIdMap.put(chunk.getId(), chunk);
return chunk;
}
// Chunks from pool are created covered with strong references anyway
// TODO: change to CHUNK_MAP if it is generally defined
private Chunk createChunkForPool(CompactingMemStore.IndexType chunkIndexType, ChunkType chunkType,
// Chunks from pool are created covered with strong references anyway.
private Chunk createChunkForPool(ChunkType chunkType,
int chunkSize) {
if (chunkSize != dataChunksPool.getChunkSize() &&
chunkSize != indexChunksPool.getChunkSize()) {
return null;
}
return createChunk(true, chunkIndexType, chunkType, chunkSize);
return createChunk(true, chunkType, chunkSize);
}
// Used to translate the ChunkID into a chunk ref
@ -346,7 +345,7 @@ public class ChunkCreator {
this.reclaimedChunks = new LinkedBlockingQueue<>();
for (int i = 0; i < initialCount; i++) {
Chunk chunk =
createChunk(true, CompactingMemStore.IndexType.ARRAY_MAP, chunkType, chunkSize);
createChunk(true, chunkType, chunkSize);
chunk.init();
reclaimedChunks.add(chunk);
}
@ -368,10 +367,6 @@ public class ChunkCreator {
* @see #putbackChunks(Chunk)
*/
Chunk getChunk() {
return getChunk(CompactingMemStore.IndexType.ARRAY_MAP);
}
Chunk getChunk(CompactingMemStore.IndexType chunkIndexType) {
Chunk chunk = reclaimedChunks.poll();
if (chunk != null) {
chunk.reset();
@ -382,7 +377,7 @@ public class ChunkCreator {
long created = this.chunkCount.get();
if (created < this.maxCount) {
if (this.chunkCount.compareAndSet(created, created + 1)) {
chunk = createChunkForPool(chunkIndexType, chunkType, chunkSize);
chunk = createChunkForPool(chunkType, chunkSize);
break;
}
} else {
@ -559,7 +554,7 @@ public class ChunkCreator {
boolean isChunkInPool(int chunkId) {
Chunk c = getChunk(chunkId);
if (c==null) {
if (c == null) {
return false;
}

View File

@ -33,6 +33,7 @@ import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.ExtendedCell;
import org.apache.hadoop.hbase.KeyValueUtil;
import org.apache.hadoop.hbase.nio.RefCnt;
import org.apache.hadoop.hbase.regionserver.CompactingMemStore.IndexType;
import org.apache.yetus.audience.InterfaceAudience;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -42,27 +43,28 @@ import org.apache.hbase.thirdparty.com.google.common.base.Preconditions;
/**
* A memstore-local allocation buffer.
* <p>
* The MemStoreLAB is basically a bump-the-pointer allocator that allocates
* big (2MB) byte[] chunks from and then doles it out to threads that request
* slices into the array.
* The MemStoreLAB is basically a bump-the-pointer allocator that allocates big (2MB) byte[] chunks
* from and then doles it out to threads that request slices into the array.
* <p>
* The purpose of this class is to combat heap fragmentation in the
* regionserver. By ensuring that all Cells in a given memstore refer
* only to large chunks of contiguous memory, we ensure that large blocks
* get freed up when the memstore is flushed.
* The purpose of this class is to combat heap fragmentation in the regionserver. By ensuring that
* all Cells in a given memstore refer only to large chunks of contiguous memory, we ensure that
* large blocks get freed up when the memstore is flushed.
* <p>
* Without the MSLAB, the byte array allocated during insertion end up
* interleaved throughout the heap, and the old generation gets progressively
* more fragmented until a stop-the-world compacting collection occurs.
* Without the MSLAB, the byte array allocated during insertion end up interleaved throughout the
* heap, and the old generation gets progressively more fragmented until a stop-the-world compacting
* collection occurs.
* <p>
* TODO: we should probably benchmark whether word-aligning the allocations
* would provide a performance improvement - probably would speed up the
* Bytes.toLong/Bytes.toInt calls in KeyValue, but some of those are cached
* anyway.
* The chunks created by this MemStoreLAB can get pooled at {@link ChunkCreator}.
* When the Chunk comes from pool, it can be either an on heap or an off heap backed chunk. The chunks,
* which this MemStoreLAB creates on its own (when no chunk available from pool), those will be
* always on heap backed.
* TODO: we should probably benchmark whether word-aligning the allocations would provide a
* performance improvement - probably would speed up the Bytes.toLong/Bytes.toInt calls in KeyValue,
* but some of those are cached anyway. The chunks created by this MemStoreLAB can get pooled at
* {@link ChunkCreator}. When the Chunk comes from pool, it can be either an on heap or an off heap
* backed chunk. The chunks, which this MemStoreLAB creates on its own (when no chunk available from
* pool), those will be always on heap backed.
* <p>
* NOTE:if user requested to work with MSLABs (whether on- or off-heap), in
* {@link CompactingMemStore} ctor, the {@link CompactingMemStore#indexType} could only be
* {@link IndexType#CHUNK_MAP},that is to say the immutable segments using MSLABs are going to use
* {@link CellChunkMap} as their index.
*/
@InterfaceAudience.Private
public class MemStoreLABImpl implements MemStoreLAB {
@ -78,7 +80,6 @@ public class MemStoreLABImpl implements MemStoreLAB {
private final int dataChunkSize;
private final int maxAlloc;
private final ChunkCreator chunkCreator;
private final CompactingMemStore.IndexType idxType; // what index is used for corresponding segment
// This flag is for closing this instance, its set when clearing snapshot of
// memstore
@ -104,13 +105,11 @@ public class MemStoreLABImpl implements MemStoreLAB {
// if we don't exclude allocations >CHUNK_SIZE, we'd infiniteloop on one!
Preconditions.checkArgument(maxAlloc <= dataChunkSize,
MAX_ALLOC_KEY + " must be less than " + CHUNK_SIZE_KEY);
this.refCnt = RefCnt.create(() -> {
recycleChunks();
});
// if user requested to work with MSLABs (whether on- or off-heap), then the
// immutable segments are going to use CellChunkMap as their index
idxType = CompactingMemStore.IndexType.CHUNK_MAP;
}
@Override
@ -339,7 +338,7 @@ public class MemStoreLABImpl implements MemStoreLAB {
if (c != null) {
return c;
}
c = this.chunkCreator.getChunk(idxType);
c = this.chunkCreator.getChunk();
if (c != null) {
// set the curChunk. No need of CAS as only one thread will be here
currChunk.set(c);

View File

@ -282,8 +282,8 @@ public class TestCellFlatSet {
// allocate new chunks and use the data chunk to hold the full data of the cells
// and the index chunk to hold the cell-representations
Chunk dataChunk = chunkCreator.getChunk(CompactingMemStore.IndexType.CHUNK_MAP);
Chunk idxChunk = chunkCreator.getChunk(CompactingMemStore.IndexType.CHUNK_MAP);
Chunk dataChunk = chunkCreator.getChunk();
Chunk idxChunk = chunkCreator.getChunk();
// the array of index chunks to be used as a basis for CellChunkMap
Chunk chunkArray[] = new Chunk[8]; // according to test currently written 8 is way enough
int chunkArrayIdx = 0;
@ -300,7 +300,7 @@ public class TestCellFlatSet {
// do we have enough space to write the cell data on the data chunk?
if (dataOffset + kv.getSerializedSize() > chunkCreator.getChunkSize()) {
// allocate more data chunks if needed
dataChunk = chunkCreator.getChunk(CompactingMemStore.IndexType.CHUNK_MAP);
dataChunk = chunkCreator.getChunk();
dataBuffer = dataChunk.getData();
dataOffset = ChunkCreator.SIZEOF_CHUNK_HEADER;
}
@ -310,7 +310,7 @@ public class TestCellFlatSet {
// do we have enough space to write the cell-representation on the index chunk?
if (idxOffset + ClassSize.CELL_CHUNK_MAP_ENTRY > chunkCreator.getChunkSize()) {
// allocate more index chunks if needed
idxChunk = chunkCreator.getChunk(CompactingMemStore.IndexType.CHUNK_MAP);
idxChunk = chunkCreator.getChunk();
idxBuffer = idxChunk.getData();
idxOffset = ChunkCreator.SIZEOF_CHUNK_HEADER;
chunkArray[chunkArrayIdx++] = idxChunk;
@ -331,10 +331,10 @@ public class TestCellFlatSet {
// allocate new chunks and use the data JUMBO chunk to hold the full data of the cells
// and the normal index chunk to hold the cell-representations
Chunk dataJumboChunk =
chunkCreator.getChunk(CompactingMemStore.IndexType.CHUNK_MAP, ChunkType.JUMBO_CHUNK,
chunkCreator.getChunk(ChunkType.JUMBO_CHUNK,
smallChunkSize);
assertTrue(dataJumboChunk.isJumbo());
Chunk idxChunk = chunkCreator.getChunk(CompactingMemStore.IndexType.CHUNK_MAP);
Chunk idxChunk = chunkCreator.getChunk();
// the array of index chunks to be used as a basis for CellChunkMap
Chunk[] chunkArray = new Chunk[8]; // according to test currently written 8 is way enough
int chunkArrayIdx = 0;
@ -354,7 +354,7 @@ public class TestCellFlatSet {
// do we have enough space to write the cell-representation on the index chunk?
if (idxOffset + ClassSize.CELL_CHUNK_MAP_ENTRY > chunkCreator.getChunkSize()) {
// allocate more index chunks if needed
idxChunk = chunkCreator.getChunk(CompactingMemStore.IndexType.CHUNK_MAP);
idxChunk = chunkCreator.getChunk();
idxBuffer = idxChunk.getData();
idxOffset = ChunkCreator.SIZEOF_CHUNK_HEADER;
chunkArray[chunkArrayIdx++] = idxChunk;
@ -368,7 +368,7 @@ public class TestCellFlatSet {
// Jumbo chunks are working only with one cell per chunk, thus always allocate a new jumbo
// data chunk for next cell
dataJumboChunk =
chunkCreator.getChunk(CompactingMemStore.IndexType.CHUNK_MAP, ChunkType.JUMBO_CHUNK,
chunkCreator.getChunk(ChunkType.JUMBO_CHUNK,
smallChunkSize);
assertTrue(dataJumboChunk.isJumbo());
dataBuffer = dataJumboChunk.getData();

View File

@ -312,7 +312,7 @@ public class TestMemStoreChunkPool {
assertEquals(initialCount, newCreator.getPoolSize());
assertEquals(0, newCreator.getPoolSize(ChunkType.INDEX_CHUNK));
Chunk dataChunk = newCreator.getChunk(CompactingMemStore.IndexType.CHUNK_MAP);
Chunk dataChunk = newCreator.getChunk();
assertTrue(dataChunk.isDataChunk());
assertTrue(dataChunk.isFromPool());
assertEquals(initialCount - 1, newCreator.getPoolSize());
@ -323,7 +323,7 @@ public class TestMemStoreChunkPool {
// We set ChunkCreator.indexChunkSize to 0, but we want to get a IndexChunk
try {
newCreator.getChunk(CompactingMemStore.IndexType.CHUNK_MAP, ChunkType.INDEX_CHUNK);
newCreator.getChunk(ChunkType.INDEX_CHUNK);
fail();
} catch (IllegalArgumentException e) {
}
@ -350,7 +350,7 @@ public class TestMemStoreChunkPool {
assertEquals(0, newCreator.getPoolSize());
assertEquals(0, newCreator.getPoolSize(ChunkType.INDEX_CHUNK));
dataChunk = newCreator.getChunk(CompactingMemStore.IndexType.CHUNK_MAP);
dataChunk = newCreator.getChunk();
assertTrue(dataChunk.isDataChunk());
assertTrue(!dataChunk.isFromPool());
assertEquals(0, newCreator.getPoolSize());
@ -358,7 +358,7 @@ public class TestMemStoreChunkPool {
try {
// We set ChunkCreator.indexChunkSize to 0, but we want to get a IndexChunk
newCreator.getChunk(CompactingMemStore.IndexType.CHUNK_MAP, ChunkType.INDEX_CHUNK);
newCreator.getChunk(ChunkType.INDEX_CHUNK);
fail();
} catch (IllegalArgumentException e) {
}
@ -387,14 +387,14 @@ public class TestMemStoreChunkPool {
assertEquals(0, newCreator.getPoolSize());
assertEquals(initialCount, newCreator.getPoolSize(ChunkType.INDEX_CHUNK));
dataChunk = newCreator.getChunk(CompactingMemStore.IndexType.CHUNK_MAP);
dataChunk = newCreator.getChunk();
assertTrue(dataChunk.isDataChunk());
assertTrue(!dataChunk.isFromPool());
assertEquals(0, newCreator.getPoolSize());
assertEquals(initialCount, newCreator.getPoolSize(ChunkType.INDEX_CHUNK));
Chunk indexChunk =
newCreator.getChunk(CompactingMemStore.IndexType.CHUNK_MAP, ChunkType.INDEX_CHUNK);
newCreator.getChunk(ChunkType.INDEX_CHUNK);
assertEquals(0, newCreator.getPoolSize());
assertEquals(initialCount - 1, newCreator.getPoolSize(ChunkType.INDEX_CHUNK));
assertTrue(indexChunk.isIndexChunk());
@ -415,10 +415,10 @@ public class TestMemStoreChunkPool {
// Test both dataChunksPool and indexChunksPool are not null
assertTrue(ChunkCreator.getInstance().getDataChunksPool() != null);
assertTrue(ChunkCreator.getInstance().getIndexChunksPool() != null);
Chunk dataChunk = ChunkCreator.getInstance().getChunk(CompactingMemStore.IndexType.CHUNK_MAP);
Chunk dataChunk = ChunkCreator.getInstance().getChunk();
assertTrue(dataChunk.isDataChunk());
assertTrue(dataChunk.isFromPool());
Chunk indexChunk = ChunkCreator.getInstance().getChunk(CompactingMemStore.IndexType.CHUNK_MAP,
Chunk indexChunk = ChunkCreator.getInstance().getChunk(
ChunkType.INDEX_CHUNK);
assertTrue(indexChunk.isIndexChunk());
assertTrue(indexChunk.isFromPool());

View File

@ -39,6 +39,7 @@ import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.MultithreadedTestUtil;
import org.apache.hadoop.hbase.MultithreadedTestUtil.TestThread;
import org.apache.hadoop.hbase.io.util.MemorySizeUtil;
import org.apache.hadoop.hbase.regionserver.ChunkCreator.ChunkType;
import org.apache.hadoop.hbase.testclassification.MediumTests;
import org.apache.hadoop.hbase.testclassification.RegionServerTests;
import org.apache.hadoop.hbase.util.Bytes;
@ -298,6 +299,37 @@ public class TestMemStoreLAB {
.currentTime(), bigValue);
assertEquals(bigKV.getSerializedSize(),
mslab.forceCopyOfBigCellInto(bigKV).getSerializedSize());
/**
* Add test by HBASE-26576,all the chunks are in {@link ChunkCreator#chunkIdMap}
*/
assertTrue(mslab.chunks.size() == 2);
Chunk dataChunk = null;
Chunk jumboChunk = null;
for (Integer chunkId : mslab.chunks) {
Chunk chunk = ChunkCreator.getInstance().getChunk(chunkId);
assertTrue(chunk != null);
if (chunk.getChunkType() == ChunkType.JUMBO_CHUNK) {
jumboChunk = chunk;
} else if (chunk.getChunkType() == ChunkType.DATA_CHUNK) {
dataChunk = chunk;
}
}
assertTrue(dataChunk != null);
assertTrue(jumboChunk != null);
mslab.close();
/**
* After mslab close, jumboChunk is removed from {@link ChunkCreator#chunkIdMap} but because
* dataChunk is recycled to pool so it is still in {@link ChunkCreator#chunkIdMap}.
*/
assertTrue(ChunkCreator.getInstance().getChunk(jumboChunk.getId()) == null);
assertTrue(!ChunkCreator.getInstance().isChunkInPool(jumboChunk.getId()));
assertTrue(ChunkCreator.getInstance().getChunk(dataChunk.getId()) == dataChunk);
assertTrue(ChunkCreator.getInstance().isChunkInPool(dataChunk.getId()));
}
private Thread getChunkQueueTestThread(final MemStoreLABImpl mslab, String threadName,