HBASE-16436 Adding CellChunkMap code, its tests and fixes to all code review comments
This commit is contained in:
parent
305ffcb040
commit
5cdaca5c00
|
@ -0,0 +1,127 @@
|
|||
/**
|
||||
*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Cellersion 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY CellIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.hbase.regionserver;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
|
||||
import com.google.common.annotations.VisibleForTesting;
|
||||
import org.apache.hadoop.hbase.Cell;
|
||||
import org.apache.hadoop.hbase.classification.InterfaceAudience;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
import org.apache.hadoop.hbase.util.ByteBufferUtils;
|
||||
|
||||
import java.util.Comparator;
|
||||
|
||||
|
||||
/**
|
||||
* CellChunkMap is an array of serialized representations of Cell
|
||||
* (pointing to Chunks with full Cell data) and can be allocated both off-heap and on-heap.
|
||||
*
|
||||
* CellChunkMap is a byte array (chunk) holding all that is needed to access a Cell, which
|
||||
* is actually saved on another deeper chunk.
|
||||
* Per Cell we have a reference to this deeper byte array B (chunk ID, integer),
|
||||
* offset in bytes in B (integer), length in bytes in B (integer) and seqID of the cell (long).
|
||||
* In order to save reference to byte array we use the Chunk's ID given by ChunkCreator.
|
||||
*
|
||||
* The CellChunkMap memory layout on chunk A relevant to a deeper byte array B,
|
||||
* holding the actual cell data:
|
||||
*
|
||||
* < header > <--------------- first Cell -----------------> <-- second Cell ...
|
||||
* --------------------------------------------------------------------------------------- ...
|
||||
* integer | integer | integer | integer | long |
|
||||
* 4 bytes | 4 bytes | 4 bytes | 4 bytes | 8 bytes |
|
||||
* ChunkID | chunkID of | offset in B | length of | sequence | ...
|
||||
* of this | chunk B with | where Cell's | Cell's | ID of |
|
||||
* chunk A | Cell data | data starts | data in B | the Cell |
|
||||
* --------------------------------------------------------------------------------------- ...
|
||||
*/
|
||||
@InterfaceAudience.Private
|
||||
public class CellChunkMap extends CellFlatMap {
|
||||
|
||||
private final Chunk[] chunks; // the array of chunks, on which the index is based
|
||||
private final int numOfCellsInsideChunk; // constant number of cell-representations in a chunk
|
||||
|
||||
// each cell-representation requires three integers for chunkID (reference to the ByteBuffer),
|
||||
// offset and length, and one long for seqID
|
||||
public static final int SIZEOF_CELL_REP = 3*Bytes.SIZEOF_INT + Bytes.SIZEOF_LONG ;
|
||||
|
||||
/**
|
||||
* C-tor for creating CellChunkMap from existing Chunk array, which must be ordered
|
||||
* (decreasingly or increasingly according to parameter "descending")
|
||||
* @param comparator a tool for comparing cells
|
||||
* @param chunks ordered array of index chunk with cell representations
|
||||
* @param min the index of the first cell (usually 0)
|
||||
* @param max number of Cells or the index of the cell after the maximal cell
|
||||
* @param descending the order of the given array
|
||||
*/
|
||||
public CellChunkMap(Comparator<? super Cell> comparator,
|
||||
Chunk[] chunks, int min, int max, boolean descending) {
|
||||
super(comparator, min, max, descending);
|
||||
this.chunks = chunks;
|
||||
this.numOfCellsInsideChunk = // each chunk starts with its own ID following the cells data
|
||||
(ChunkCreator.getInstance().getChunkSize() - Bytes.SIZEOF_INT) / SIZEOF_CELL_REP;
|
||||
|
||||
}
|
||||
|
||||
/* To be used by base (CellFlatMap) class only to create a sub-CellFlatMap
|
||||
* Should be used only to create only CellChunkMap from CellChunkMap */
|
||||
@Override
|
||||
protected CellFlatMap createSubCellFlatMap(int min, int max, boolean descending) {
|
||||
return new CellChunkMap(this.comparator(), this.chunks, min, max, descending);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
protected Cell getCell(int i) {
|
||||
// get the index of the relevant chunk inside chunk array
|
||||
int chunkIndex = (i / numOfCellsInsideChunk);
|
||||
ByteBuffer block = chunks[chunkIndex].getData();// get the ByteBuffer of the relevant chunk
|
||||
int j = i - chunkIndex * numOfCellsInsideChunk; // get the index of the cell-representation
|
||||
|
||||
// find inside the offset inside the chunk holding the index, skip bytes for chunk id
|
||||
int offsetInBytes = Bytes.SIZEOF_INT + j* SIZEOF_CELL_REP;
|
||||
|
||||
|
||||
// find the chunk holding the data of the cell, the chunkID is stored first
|
||||
int chunkId = ByteBufferUtils.toInt(block, offsetInBytes);
|
||||
Chunk chunk = ChunkCreator.getInstance().getChunk(chunkId);
|
||||
if (chunk == null) {
|
||||
// this should not happen, putting an assertion here at least for the testing period
|
||||
assert false;
|
||||
}
|
||||
|
||||
// find the offset of the data of the cell, skip integer for chunkID, offset is stored second
|
||||
int offsetOfCell = ByteBufferUtils.toInt(block, offsetInBytes + Bytes.SIZEOF_INT);
|
||||
// find the length of the data of the cell, skip two integers for chunkID and offset,
|
||||
// length is stored third
|
||||
int lengthOfCell = ByteBufferUtils.toInt(block, offsetInBytes + 2*Bytes.SIZEOF_INT);
|
||||
// find the seqID of the cell, skip three integers for chunkID, offset, and length
|
||||
// the seqID is plain written as part of the cell representation
|
||||
long cellSeqID = ByteBufferUtils.toLong(block, offsetInBytes + 3*Bytes.SIZEOF_INT);
|
||||
|
||||
ByteBuffer buf = chunk.getData(); // get the ByteBuffer where the cell data is stored
|
||||
if (buf == null) {
|
||||
// this should not happen, putting an assertion here at least for the testing period
|
||||
assert false;
|
||||
}
|
||||
|
||||
return new ByteBufferChunkCell(buf, offsetOfCell, lengthOfCell, cellSeqID);
|
||||
}
|
||||
}
|
|
@ -126,6 +126,9 @@ public class CellSet implements NavigableSet<Cell> {
|
|||
throw new UnsupportedOperationException("Not implemented");
|
||||
}
|
||||
|
||||
// TODO: why do we have a double traversing through map? Recall we have Cell to Cell mapping...
|
||||
// First for first/last key, which actually returns Cell and then get for the same Cell?
|
||||
// TODO: Consider just return the first/lastKey(), should be twice more effective...
|
||||
public Cell first() {
|
||||
return this.delegatee.get(this.delegatee.firstKey());
|
||||
}
|
||||
|
|
|
@ -18,6 +18,10 @@
|
|||
*/
|
||||
package org.apache.hadoop.hbase.regionserver;
|
||||
|
||||
|
||||
import java.lang.management.ManagementFactory;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.util.Iterator;
|
||||
import java.util.NavigableMap;
|
||||
import java.util.NavigableSet;
|
||||
|
@ -28,16 +32,30 @@ import org.apache.hadoop.hbase.Cell;
|
|||
import org.apache.hadoop.hbase.CellComparator;
|
||||
import org.apache.hadoop.hbase.CellUtil;
|
||||
import org.apache.hadoop.hbase.KeyValue;
|
||||
import org.apache.hadoop.hbase.KeyValueUtil;
|
||||
|
||||
import org.apache.hadoop.hbase.io.util.MemorySizeUtil;
|
||||
|
||||
|
||||
import org.apache.hadoop.hbase.testclassification.RegionServerTests;
|
||||
import org.apache.hadoop.hbase.testclassification.SmallTests;
|
||||
import org.apache.hadoop.hbase.util.ByteBufferUtils;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
import org.junit.experimental.categories.Category;
|
||||
import org.junit.runner.RunWith;
|
||||
import org.junit.runners.Parameterized;
|
||||
|
||||
|
||||
import static org.junit.Assert.assertTrue;
|
||||
|
||||
@Category({RegionServerTests.class, SmallTests.class})
|
||||
@RunWith(Parameterized.class)
|
||||
public class TestCellFlatSet extends TestCase {
|
||||
@Parameterized.Parameters
|
||||
public static Object[] data() {
|
||||
return new Object[] { "SMALL_CHUNKS", "NORMAL_CHUNKS" }; // test with different chunk sizes
|
||||
}
|
||||
private static final int NUM_OF_CELLS = 4;
|
||||
private Cell ascCells[];
|
||||
private CellArrayMap ascCbOnHeap;
|
||||
|
@ -47,8 +65,33 @@ public class TestCellFlatSet extends TestCase {
|
|||
private KeyValue lowerOuterCell;
|
||||
private KeyValue upperOuterCell;
|
||||
|
||||
|
||||
private CellChunkMap ascCCM; // for testing ascending CellChunkMap with one chunk in array
|
||||
private CellChunkMap descCCM; // for testing descending CellChunkMap with one chunk in array
|
||||
private CellChunkMap ascMultCCM; // testing ascending CellChunkMap with multiple chunks in array
|
||||
private CellChunkMap descMultCCM;// testing descending CellChunkMap with multiple chunks in array
|
||||
private static ChunkCreator chunkCreator;
|
||||
|
||||
|
||||
public TestCellFlatSet(String chunkType){
|
||||
long globalMemStoreLimit = (long) (ManagementFactory.getMemoryMXBean().getHeapMemoryUsage()
|
||||
.getMax() * MemorySizeUtil.getGlobalMemStoreHeapPercent(CONF, false));
|
||||
if (chunkType == "NORMAL_CHUNKS") {
|
||||
chunkCreator = ChunkCreator.initialize(MemStoreLABImpl.CHUNK_SIZE_DEFAULT, false,
|
||||
globalMemStoreLimit, 0.2f, MemStoreLAB.POOL_INITIAL_SIZE_DEFAULT, null);
|
||||
assertTrue(chunkCreator != null);
|
||||
} else {
|
||||
// chunkCreator with smaller chunk size, so only 3 cell-representations can accommodate a chunk
|
||||
chunkCreator = ChunkCreator.initialize(64, false,
|
||||
globalMemStoreLimit, 0.2f, MemStoreLAB.POOL_INITIAL_SIZE_DEFAULT, null);
|
||||
assertTrue(chunkCreator != null);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
@Before
|
||||
@Override
|
||||
protected void setUp() throws Exception {
|
||||
public void setUp() throws Exception {
|
||||
super.setUp();
|
||||
|
||||
// create array of Cells to bass to the CellFlatMap under CellSet
|
||||
|
@ -71,17 +114,45 @@ public class TestCellFlatSet extends TestCase {
|
|||
ascCbOnHeap = new CellArrayMap(CellComparator.COMPARATOR,ascCells,0,NUM_OF_CELLS,false);
|
||||
descCells = new Cell[] {kv4,kv3,kv2,kv1};
|
||||
descCbOnHeap = new CellArrayMap(CellComparator.COMPARATOR,descCells,0,NUM_OF_CELLS,true);
|
||||
|
||||
CONF.setBoolean(MemStoreLAB.USEMSLAB_KEY, true);
|
||||
CONF.setFloat(MemStoreLAB.CHUNK_POOL_MAXSIZE_KEY, 0.2f);
|
||||
ChunkCreator.chunkPoolDisabled = false;
|
||||
|
||||
// create ascending and descending CellChunkMaps
|
||||
// according to parameter, once built with normal chunks and at second with small chunks
|
||||
ascCCM = setUpCellChunkMap(true);
|
||||
descCCM = setUpCellChunkMap(false);
|
||||
|
||||
|
||||
// ascMultCCM = setUpCellChunkMap(true);
|
||||
// descMultCCM = setUpCellChunkMap(false);
|
||||
}
|
||||
|
||||
/* Create and test CellSet based on CellArrayMap */
|
||||
public void testCellBlocksOnHeap() throws Exception {
|
||||
/* Create and test ascending CellSet based on CellArrayMap */
|
||||
@Test
|
||||
public void testCellArrayMapAsc() throws Exception {
|
||||
CellSet cs = new CellSet(ascCbOnHeap);
|
||||
testCellBlocks(cs);
|
||||
testIterators(cs);
|
||||
}
|
||||
|
||||
/* Create and test ascending and descending CellSet based on CellChunkMap */
|
||||
@Test
|
||||
public void testCellChunkMap() throws Exception {
|
||||
CellSet cs = new CellSet(ascCCM);
|
||||
testCellBlocks(cs);
|
||||
testIterators(cs);
|
||||
testSubSet(cs);
|
||||
cs = new CellSet(descCCM);
|
||||
testSubSet(cs);
|
||||
// cs = new CellSet(ascMultCCM);
|
||||
// testCellBlocks(cs);
|
||||
// testSubSet(cs);
|
||||
// cs = new CellSet(descMultCCM);
|
||||
// testSubSet(cs);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testAsc() throws Exception {
|
||||
CellSet ascCs = new CellSet(ascCbOnHeap);
|
||||
|
@ -148,7 +219,7 @@ public class TestCellFlatSet extends TestCase {
|
|||
assertEquals(NUM_OF_CELLS, cs.size()); // check size
|
||||
assertFalse(cs.contains(outerCell)); // check outer cell
|
||||
|
||||
assertTrue(cs.contains(ascCells[0])); // check existence of the first
|
||||
assertTrue(cs.contains(ascCells[0])); // check existence of the first
|
||||
Cell first = cs.first();
|
||||
assertTrue(ascCells[0].equals(first));
|
||||
|
||||
|
@ -200,4 +271,51 @@ public class TestCellFlatSet extends TestCase {
|
|||
}
|
||||
assertEquals(NUM_OF_CELLS, count);
|
||||
}
|
||||
|
||||
/* Create CellChunkMap with four cells inside the index chunk */
|
||||
private CellChunkMap setUpCellChunkMap(boolean asc) {
|
||||
|
||||
// allocate new chunks and use the data chunk to hold the full data of the cells
|
||||
// and the index chunk to hold the cell-representations
|
||||
Chunk dataChunk = chunkCreator.getChunk();
|
||||
Chunk idxChunk = chunkCreator.getChunk();
|
||||
// the array of index chunks to be used as a basis for CellChunkMap
|
||||
Chunk chunkArray[] = new Chunk[8]; // according to test currently written 8 is way enough
|
||||
int chunkArrayIdx = 0;
|
||||
chunkArray[chunkArrayIdx++] = idxChunk;
|
||||
|
||||
ByteBuffer idxBuffer = idxChunk.getData(); // the buffers of the chunks
|
||||
ByteBuffer dataBuffer = dataChunk.getData();
|
||||
int dataOffset = Bytes.SIZEOF_INT; // offset inside data buffer
|
||||
int idxOffset = Bytes.SIZEOF_INT; // skip the space for chunk ID
|
||||
|
||||
Cell[] cellArray = asc ? ascCells : descCells;
|
||||
|
||||
for (Cell kv: cellArray) {
|
||||
// do we have enough space to write the cell data on the data chunk?
|
||||
if (dataOffset + KeyValueUtil.length(kv) > chunkCreator.getChunkSize()) {
|
||||
dataChunk = chunkCreator.getChunk(); // allocate more data chunks if needed
|
||||
dataBuffer = dataChunk.getData();
|
||||
dataOffset = Bytes.SIZEOF_INT;
|
||||
}
|
||||
int dataStartOfset = dataOffset;
|
||||
dataOffset = KeyValueUtil.appendTo(kv, dataBuffer, dataOffset, false); // write deep cell data
|
||||
|
||||
// do we have enough space to write the cell-representation on the index chunk?
|
||||
if (idxOffset + CellChunkMap.SIZEOF_CELL_REP > chunkCreator.getChunkSize()) {
|
||||
idxChunk = chunkCreator.getChunk(); // allocate more index chunks if needed
|
||||
idxBuffer = idxChunk.getData();
|
||||
idxOffset = Bytes.SIZEOF_INT;
|
||||
chunkArray[chunkArrayIdx++] = idxChunk;
|
||||
}
|
||||
idxOffset = ByteBufferUtils.putInt(idxBuffer, idxOffset, dataChunk.getId()); // write data chunk id
|
||||
idxOffset = ByteBufferUtils.putInt(idxBuffer, idxOffset, dataStartOfset); // offset
|
||||
idxOffset = ByteBufferUtils.putInt(idxBuffer, idxOffset, KeyValueUtil.length(kv)); // length
|
||||
idxOffset = ByteBufferUtils.putLong(idxBuffer, idxOffset, kv.getSequenceId()); // seqId
|
||||
}
|
||||
|
||||
return asc ?
|
||||
new CellChunkMap(CellComparator.COMPARATOR,chunkArray,0,NUM_OF_CELLS,false) :
|
||||
new CellChunkMap(CellComparator.COMPARATOR,chunkArray,0,NUM_OF_CELLS,true);
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue