HBASE-16436 Adding CellChunkMap code, its tests and fixes to all code review comments

This commit is contained in:
anastas 2017-05-14 16:04:36 +03:00
parent 305ffcb040
commit 5cdaca5c00
3 changed files with 253 additions and 5 deletions

View File

@ -0,0 +1,127 @@
/**
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Cellersion 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY CellIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.regionserver;
import java.nio.ByteBuffer;
import com.google.common.annotations.VisibleForTesting;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.classification.InterfaceAudience;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.ByteBufferUtils;
import java.util.Comparator;
/**
* CellChunkMap is an array of serialized representations of Cell
* (pointing to Chunks with full Cell data) and can be allocated both off-heap and on-heap.
*
* CellChunkMap is a byte array (chunk) holding all that is needed to access a Cell, which
* is actually saved on another deeper chunk.
* Per Cell we have a reference to this deeper byte array B (chunk ID, integer),
* offset in bytes in B (integer), length in bytes in B (integer) and seqID of the cell (long).
* In order to save reference to byte array we use the Chunk's ID given by ChunkCreator.
*
* The CellChunkMap memory layout on chunk A relevant to a deeper byte array B,
* holding the actual cell data:
*
* < header > <--------------- first Cell -----------------> <-- second Cell ...
* --------------------------------------------------------------------------------------- ...
* integer | integer | integer | integer | long |
* 4 bytes | 4 bytes | 4 bytes | 4 bytes | 8 bytes |
* ChunkID | chunkID of | offset in B | length of | sequence | ...
* of this | chunk B with | where Cell's | Cell's | ID of |
* chunk A | Cell data | data starts | data in B | the Cell |
* --------------------------------------------------------------------------------------- ...
*/
@InterfaceAudience.Private
public class CellChunkMap extends CellFlatMap {
private final Chunk[] chunks; // the array of chunks, on which the index is based
private final int numOfCellsInsideChunk; // constant number of cell-representations in a chunk
// each cell-representation requires three integers for chunkID (reference to the ByteBuffer),
// offset and length, and one long for seqID
public static final int SIZEOF_CELL_REP = 3*Bytes.SIZEOF_INT + Bytes.SIZEOF_LONG ;
/**
* C-tor for creating CellChunkMap from existing Chunk array, which must be ordered
* (decreasingly or increasingly according to parameter "descending")
* @param comparator a tool for comparing cells
* @param chunks ordered array of index chunk with cell representations
* @param min the index of the first cell (usually 0)
* @param max number of Cells or the index of the cell after the maximal cell
* @param descending the order of the given array
*/
public CellChunkMap(Comparator<? super Cell> comparator,
Chunk[] chunks, int min, int max, boolean descending) {
super(comparator, min, max, descending);
this.chunks = chunks;
this.numOfCellsInsideChunk = // each chunk starts with its own ID following the cells data
(ChunkCreator.getInstance().getChunkSize() - Bytes.SIZEOF_INT) / SIZEOF_CELL_REP;
}
/* To be used by base (CellFlatMap) class only to create a sub-CellFlatMap
* Should be used only to create only CellChunkMap from CellChunkMap */
@Override
protected CellFlatMap createSubCellFlatMap(int min, int max, boolean descending) {
return new CellChunkMap(this.comparator(), this.chunks, min, max, descending);
}
@Override
protected Cell getCell(int i) {
// get the index of the relevant chunk inside chunk array
int chunkIndex = (i / numOfCellsInsideChunk);
ByteBuffer block = chunks[chunkIndex].getData();// get the ByteBuffer of the relevant chunk
int j = i - chunkIndex * numOfCellsInsideChunk; // get the index of the cell-representation
// find inside the offset inside the chunk holding the index, skip bytes for chunk id
int offsetInBytes = Bytes.SIZEOF_INT + j* SIZEOF_CELL_REP;
// find the chunk holding the data of the cell, the chunkID is stored first
int chunkId = ByteBufferUtils.toInt(block, offsetInBytes);
Chunk chunk = ChunkCreator.getInstance().getChunk(chunkId);
if (chunk == null) {
// this should not happen, putting an assertion here at least for the testing period
assert false;
}
// find the offset of the data of the cell, skip integer for chunkID, offset is stored second
int offsetOfCell = ByteBufferUtils.toInt(block, offsetInBytes + Bytes.SIZEOF_INT);
// find the length of the data of the cell, skip two integers for chunkID and offset,
// length is stored third
int lengthOfCell = ByteBufferUtils.toInt(block, offsetInBytes + 2*Bytes.SIZEOF_INT);
// find the seqID of the cell, skip three integers for chunkID, offset, and length
// the seqID is plain written as part of the cell representation
long cellSeqID = ByteBufferUtils.toLong(block, offsetInBytes + 3*Bytes.SIZEOF_INT);
ByteBuffer buf = chunk.getData(); // get the ByteBuffer where the cell data is stored
if (buf == null) {
// this should not happen, putting an assertion here at least for the testing period
assert false;
}
return new ByteBufferChunkCell(buf, offsetOfCell, lengthOfCell, cellSeqID);
}
}

View File

@ -126,6 +126,9 @@ public class CellSet implements NavigableSet<Cell> {
throw new UnsupportedOperationException("Not implemented");
}
// TODO: why do we have a double traversing through map? Recall we have Cell to Cell mapping...
// First for first/last key, which actually returns Cell and then get for the same Cell?
// TODO: Consider just return the first/lastKey(), should be twice more effective...
public Cell first() {
return this.delegatee.get(this.delegatee.firstKey());
}

View File

@ -18,6 +18,10 @@
*/
package org.apache.hadoop.hbase.regionserver;
import java.lang.management.ManagementFactory;
import java.nio.ByteBuffer;
import java.util.Iterator;
import java.util.NavigableMap;
import java.util.NavigableSet;
@ -28,16 +32,30 @@ import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellComparator;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.KeyValueUtil;
import org.apache.hadoop.hbase.io.util.MemorySizeUtil;
import org.apache.hadoop.hbase.testclassification.RegionServerTests;
import org.apache.hadoop.hbase.testclassification.SmallTests;
import org.apache.hadoop.hbase.util.ByteBufferUtils;
import org.apache.hadoop.hbase.util.Bytes;
import org.junit.Before;
import org.junit.Test;
import org.junit.experimental.categories.Category;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
import static org.junit.Assert.assertTrue;
@Category({RegionServerTests.class, SmallTests.class})
@RunWith(Parameterized.class)
public class TestCellFlatSet extends TestCase {
@Parameterized.Parameters
public static Object[] data() {
return new Object[] { "SMALL_CHUNKS", "NORMAL_CHUNKS" }; // test with different chunk sizes
}
private static final int NUM_OF_CELLS = 4;
private Cell ascCells[];
private CellArrayMap ascCbOnHeap;
@ -47,8 +65,33 @@ public class TestCellFlatSet extends TestCase {
private KeyValue lowerOuterCell;
private KeyValue upperOuterCell;
private CellChunkMap ascCCM; // for testing ascending CellChunkMap with one chunk in array
private CellChunkMap descCCM; // for testing descending CellChunkMap with one chunk in array
private CellChunkMap ascMultCCM; // testing ascending CellChunkMap with multiple chunks in array
private CellChunkMap descMultCCM;// testing descending CellChunkMap with multiple chunks in array
private static ChunkCreator chunkCreator;
public TestCellFlatSet(String chunkType){
long globalMemStoreLimit = (long) (ManagementFactory.getMemoryMXBean().getHeapMemoryUsage()
.getMax() * MemorySizeUtil.getGlobalMemStoreHeapPercent(CONF, false));
if (chunkType == "NORMAL_CHUNKS") {
chunkCreator = ChunkCreator.initialize(MemStoreLABImpl.CHUNK_SIZE_DEFAULT, false,
globalMemStoreLimit, 0.2f, MemStoreLAB.POOL_INITIAL_SIZE_DEFAULT, null);
assertTrue(chunkCreator != null);
} else {
// chunkCreator with smaller chunk size, so only 3 cell-representations can accommodate a chunk
chunkCreator = ChunkCreator.initialize(64, false,
globalMemStoreLimit, 0.2f, MemStoreLAB.POOL_INITIAL_SIZE_DEFAULT, null);
assertTrue(chunkCreator != null);
}
}
@Before
@Override
protected void setUp() throws Exception {
public void setUp() throws Exception {
super.setUp();
// create array of Cells to bass to the CellFlatMap under CellSet
@ -71,17 +114,45 @@ public class TestCellFlatSet extends TestCase {
ascCbOnHeap = new CellArrayMap(CellComparator.COMPARATOR,ascCells,0,NUM_OF_CELLS,false);
descCells = new Cell[] {kv4,kv3,kv2,kv1};
descCbOnHeap = new CellArrayMap(CellComparator.COMPARATOR,descCells,0,NUM_OF_CELLS,true);
CONF.setBoolean(MemStoreLAB.USEMSLAB_KEY, true);
CONF.setFloat(MemStoreLAB.CHUNK_POOL_MAXSIZE_KEY, 0.2f);
ChunkCreator.chunkPoolDisabled = false;
// create ascending and descending CellChunkMaps
// according to parameter, once built with normal chunks and at second with small chunks
ascCCM = setUpCellChunkMap(true);
descCCM = setUpCellChunkMap(false);
// ascMultCCM = setUpCellChunkMap(true);
// descMultCCM = setUpCellChunkMap(false);
}
/* Create and test CellSet based on CellArrayMap */
public void testCellBlocksOnHeap() throws Exception {
/* Create and test ascending CellSet based on CellArrayMap */
@Test
public void testCellArrayMapAsc() throws Exception {
CellSet cs = new CellSet(ascCbOnHeap);
testCellBlocks(cs);
testIterators(cs);
}
/* Create and test ascending and descending CellSet based on CellChunkMap */
@Test
public void testCellChunkMap() throws Exception {
CellSet cs = new CellSet(ascCCM);
testCellBlocks(cs);
testIterators(cs);
testSubSet(cs);
cs = new CellSet(descCCM);
testSubSet(cs);
// cs = new CellSet(ascMultCCM);
// testCellBlocks(cs);
// testSubSet(cs);
// cs = new CellSet(descMultCCM);
// testSubSet(cs);
}
@Test
public void testAsc() throws Exception {
CellSet ascCs = new CellSet(ascCbOnHeap);
@ -148,7 +219,7 @@ public class TestCellFlatSet extends TestCase {
assertEquals(NUM_OF_CELLS, cs.size()); // check size
assertFalse(cs.contains(outerCell)); // check outer cell
assertTrue(cs.contains(ascCells[0])); // check existence of the first
assertTrue(cs.contains(ascCells[0])); // check existence of the first
Cell first = cs.first();
assertTrue(ascCells[0].equals(first));
@ -200,4 +271,51 @@ public class TestCellFlatSet extends TestCase {
}
assertEquals(NUM_OF_CELLS, count);
}
/* Create CellChunkMap with four cells inside the index chunk */
private CellChunkMap setUpCellChunkMap(boolean asc) {
// allocate new chunks and use the data chunk to hold the full data of the cells
// and the index chunk to hold the cell-representations
Chunk dataChunk = chunkCreator.getChunk();
Chunk idxChunk = chunkCreator.getChunk();
// the array of index chunks to be used as a basis for CellChunkMap
Chunk chunkArray[] = new Chunk[8]; // according to test currently written 8 is way enough
int chunkArrayIdx = 0;
chunkArray[chunkArrayIdx++] = idxChunk;
ByteBuffer idxBuffer = idxChunk.getData(); // the buffers of the chunks
ByteBuffer dataBuffer = dataChunk.getData();
int dataOffset = Bytes.SIZEOF_INT; // offset inside data buffer
int idxOffset = Bytes.SIZEOF_INT; // skip the space for chunk ID
Cell[] cellArray = asc ? ascCells : descCells;
for (Cell kv: cellArray) {
// do we have enough space to write the cell data on the data chunk?
if (dataOffset + KeyValueUtil.length(kv) > chunkCreator.getChunkSize()) {
dataChunk = chunkCreator.getChunk(); // allocate more data chunks if needed
dataBuffer = dataChunk.getData();
dataOffset = Bytes.SIZEOF_INT;
}
int dataStartOfset = dataOffset;
dataOffset = KeyValueUtil.appendTo(kv, dataBuffer, dataOffset, false); // write deep cell data
// do we have enough space to write the cell-representation on the index chunk?
if (idxOffset + CellChunkMap.SIZEOF_CELL_REP > chunkCreator.getChunkSize()) {
idxChunk = chunkCreator.getChunk(); // allocate more index chunks if needed
idxBuffer = idxChunk.getData();
idxOffset = Bytes.SIZEOF_INT;
chunkArray[chunkArrayIdx++] = idxChunk;
}
idxOffset = ByteBufferUtils.putInt(idxBuffer, idxOffset, dataChunk.getId()); // write data chunk id
idxOffset = ByteBufferUtils.putInt(idxBuffer, idxOffset, dataStartOfset); // offset
idxOffset = ByteBufferUtils.putInt(idxBuffer, idxOffset, KeyValueUtil.length(kv)); // length
idxOffset = ByteBufferUtils.putLong(idxBuffer, idxOffset, kv.getSequenceId()); // seqId
}
return asc ?
new CellChunkMap(CellComparator.COMPARATOR,chunkArray,0,NUM_OF_CELLS,false) :
new CellChunkMap(CellComparator.COMPARATOR,chunkArray,0,NUM_OF_CELLS,true);
}
}