HBASE-16205 When Cells are not copied to MSLAB, deep clone it while adding to Memstore.
This commit is contained in:
parent
6dbce2a8cb
commit
2df0ef549a
|
@ -0,0 +1,39 @@
|
||||||
|
/**
|
||||||
|
* Copyright The Apache Software Foundation
|
||||||
|
*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.hadoop.hbase;
|
||||||
|
|
||||||
|
import org.apache.hadoop.hbase.classification.InterfaceAudience;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A cell implementing this interface would mean that the memory area backing this cell will refer
|
||||||
|
* to a memory area that could be part of a larger common memory area used by the RegionServer. This
|
||||||
|
* might be the bigger memory chunk where the RPC requests are read into. If an exclusive instance
|
||||||
|
* is required, use the {@link #cloneToCell()} to have the contents of the cell copied to an
|
||||||
|
* exclusive memory area.
|
||||||
|
*/
|
||||||
|
@InterfaceAudience.Private
|
||||||
|
public interface ShareableMemory {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Does a deep copy of the contents to a new memory area and returns it in the form of a cell.
|
||||||
|
* @return The deep cloned cell
|
||||||
|
*/
|
||||||
|
Cell cloneToCell();
|
||||||
|
}
|
|
@ -24,10 +24,13 @@ import java.nio.ByteBuffer;
|
||||||
|
|
||||||
import org.apache.hadoop.hbase.Cell;
|
import org.apache.hadoop.hbase.Cell;
|
||||||
import org.apache.hadoop.hbase.HBaseInterfaceAudience;
|
import org.apache.hadoop.hbase.HBaseInterfaceAudience;
|
||||||
|
import org.apache.hadoop.hbase.KeyValue;
|
||||||
import org.apache.hadoop.hbase.KeyValueUtil;
|
import org.apache.hadoop.hbase.KeyValueUtil;
|
||||||
import org.apache.hadoop.hbase.NoTagsKeyValue;
|
import org.apache.hadoop.hbase.NoTagsKeyValue;
|
||||||
|
import org.apache.hadoop.hbase.ShareableMemory;
|
||||||
import org.apache.hadoop.hbase.classification.InterfaceAudience;
|
import org.apache.hadoop.hbase.classification.InterfaceAudience;
|
||||||
import org.apache.hadoop.hbase.util.ByteBufferUtils;
|
import org.apache.hadoop.hbase.util.ByteBufferUtils;
|
||||||
|
import org.apache.hadoop.hbase.util.Bytes;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Codec that does KeyValue version 1 serialization.
|
* Codec that does KeyValue version 1 serialization.
|
||||||
|
@ -99,7 +102,35 @@ public class KeyValueCodec implements Codec {
|
||||||
}
|
}
|
||||||
|
|
||||||
protected Cell createCell(byte[] buf, int offset, int len) {
|
protected Cell createCell(byte[] buf, int offset, int len) {
|
||||||
return new NoTagsKeyValue(buf, offset, len);
|
return new ShareableMemoryNoTagsKeyValue(buf, offset, len);
|
||||||
|
}
|
||||||
|
|
||||||
|
static class ShareableMemoryKeyValue extends KeyValue implements ShareableMemory {
|
||||||
|
public ShareableMemoryKeyValue(byte[] bytes, int offset, int length) {
|
||||||
|
super(bytes, offset, length);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Cell cloneToCell() {
|
||||||
|
byte[] copy = Bytes.copy(this.bytes, this.offset, this.length);
|
||||||
|
KeyValue kv = new KeyValue(copy, 0, copy.length);
|
||||||
|
kv.setSequenceId(this.getSequenceId());
|
||||||
|
return kv;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static class ShareableMemoryNoTagsKeyValue extends NoTagsKeyValue implements ShareableMemory {
|
||||||
|
public ShareableMemoryNoTagsKeyValue(byte[] bytes, int offset, int length) {
|
||||||
|
super(bytes, offset, length);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Cell cloneToCell() {
|
||||||
|
byte[] copy = Bytes.copy(this.bytes, this.offset, this.length);
|
||||||
|
KeyValue kv = new NoTagsKeyValue(copy, 0, copy.length);
|
||||||
|
kv.setSequenceId(this.getSequenceId());
|
||||||
|
return kv;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -24,7 +24,6 @@ import java.nio.ByteBuffer;
|
||||||
|
|
||||||
import org.apache.hadoop.hbase.Cell;
|
import org.apache.hadoop.hbase.Cell;
|
||||||
import org.apache.hadoop.hbase.HBaseInterfaceAudience;
|
import org.apache.hadoop.hbase.HBaseInterfaceAudience;
|
||||||
import org.apache.hadoop.hbase.KeyValue;
|
|
||||||
import org.apache.hadoop.hbase.KeyValueUtil;
|
import org.apache.hadoop.hbase.KeyValueUtil;
|
||||||
import org.apache.hadoop.hbase.classification.InterfaceAudience;
|
import org.apache.hadoop.hbase.classification.InterfaceAudience;
|
||||||
|
|
||||||
|
@ -85,7 +84,7 @@ public class KeyValueCodecWithTags implements Codec {
|
||||||
}
|
}
|
||||||
|
|
||||||
protected Cell createCell(byte[] buf, int offset, int len) {
|
protected Cell createCell(byte[] buf, int offset, int len) {
|
||||||
return new KeyValue(buf, offset, len);
|
return new ShareableMemoryKeyValue(buf, offset, len);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -33,6 +33,7 @@ import org.apache.hadoop.hbase.CellComparator;
|
||||||
import org.apache.hadoop.hbase.CellUtil;
|
import org.apache.hadoop.hbase.CellUtil;
|
||||||
import org.apache.hadoop.hbase.KeyValue;
|
import org.apache.hadoop.hbase.KeyValue;
|
||||||
import org.apache.hadoop.hbase.KeyValueUtil;
|
import org.apache.hadoop.hbase.KeyValueUtil;
|
||||||
|
import org.apache.hadoop.hbase.ShareableMemory;
|
||||||
import org.apache.hadoop.hbase.classification.InterfaceAudience;
|
import org.apache.hadoop.hbase.classification.InterfaceAudience;
|
||||||
import org.apache.hadoop.hbase.util.Bytes;
|
import org.apache.hadoop.hbase.util.Bytes;
|
||||||
import org.apache.hadoop.hbase.util.ClassSize;
|
import org.apache.hadoop.hbase.util.ClassSize;
|
||||||
|
@ -110,9 +111,31 @@ public abstract class AbstractMemStore implements MemStore {
|
||||||
public long add(Cell cell) {
|
public long add(Cell cell) {
|
||||||
Cell toAdd = maybeCloneWithAllocator(cell);
|
Cell toAdd = maybeCloneWithAllocator(cell);
|
||||||
boolean mslabUsed = (toAdd != cell);
|
boolean mslabUsed = (toAdd != cell);
|
||||||
|
// This cell data is backed by the same byte[] where we read request in RPC(See HBASE-15180). By
|
||||||
|
// default MSLAB is ON and we might have copied cell to MSLAB area. If not we must do below deep
|
||||||
|
// copy. Or else we will keep referring to the bigger chunk of memory and prevent it from
|
||||||
|
// getting GCed.
|
||||||
|
// Copy to MSLAB would not have happened if
|
||||||
|
// 1. MSLAB is turned OFF. See "hbase.hregion.memstore.mslab.enabled"
|
||||||
|
// 2. When the size of the cell is bigger than the max size supported by MSLAB. See
|
||||||
|
// "hbase.hregion.memstore.mslab.max.allocation". This defaults to 256 KB
|
||||||
|
// 3. When cells are from Append/Increment operation.
|
||||||
|
if (!mslabUsed) {
|
||||||
|
toAdd = deepCopyIfNeeded(toAdd);
|
||||||
|
}
|
||||||
return internalAdd(toAdd, mslabUsed);
|
return internalAdd(toAdd, mslabUsed);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static Cell deepCopyIfNeeded(Cell cell) {
|
||||||
|
// When Cell is backed by a shared memory chunk (this can be a chunk of memory where we read the
|
||||||
|
// req into) the Cell instance will be of type ShareableMemory. Later we will add feature to
|
||||||
|
// read the RPC request into pooled direct ByteBuffers.
|
||||||
|
if (cell instanceof ShareableMemory) {
|
||||||
|
return ((ShareableMemory) cell).cloneToCell();
|
||||||
|
}
|
||||||
|
return cell;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Update or insert the specified Cells.
|
* Update or insert the specified Cells.
|
||||||
* <p>
|
* <p>
|
||||||
|
@ -156,10 +179,8 @@ public abstract class AbstractMemStore implements MemStore {
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
public long delete(Cell deleteCell) {
|
public long delete(Cell deleteCell) {
|
||||||
Cell toAdd = maybeCloneWithAllocator(deleteCell);
|
// Delete operation just adds the delete marker cell coming here.
|
||||||
boolean mslabUsed = (toAdd != deleteCell);
|
return add(deleteCell);
|
||||||
long s = internalAdd(toAdd, mslabUsed);
|
|
||||||
return s;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -245,6 +266,10 @@ public abstract class AbstractMemStore implements MemStore {
|
||||||
// hitting OOME - see TestMemStore.testUpsertMSLAB for a
|
// hitting OOME - see TestMemStore.testUpsertMSLAB for a
|
||||||
// test that triggers the pathological case if we don't avoid MSLAB
|
// test that triggers the pathological case if we don't avoid MSLAB
|
||||||
// here.
|
// here.
|
||||||
|
// This cell data is backed by the same byte[] where we read request in RPC(See HBASE-15180). We
|
||||||
|
// must do below deep copy. Or else we will keep referring to the bigger chunk of memory and
|
||||||
|
// prevent it from getting GCed.
|
||||||
|
cell = deepCopyIfNeeded(cell);
|
||||||
long addedSize = internalAdd(cell, false);
|
long addedSize = internalAdd(cell, false);
|
||||||
|
|
||||||
// Get the Cells for the row/family/qualifier regardless of timestamp.
|
// Get the Cells for the row/family/qualifier regardless of timestamp.
|
||||||
|
|
Loading…
Reference in New Issue