HBASE-16205 When Cells are not copied to MSLAB, deep clone it while adding to Memstore.
This commit is contained in:
parent
6dbce2a8cb
commit
2df0ef549a
|
@ -0,0 +1,39 @@
|
|||
/**
|
||||
* Copyright The Apache Software Foundation
|
||||
*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.hbase;
|
||||
|
||||
import org.apache.hadoop.hbase.classification.InterfaceAudience;
|
||||
|
||||
/**
|
||||
* A cell implementing this interface would mean that the memory area backing this cell will refer
|
||||
* to a memory area that could be part of a larger common memory area used by the RegionServer. This
|
||||
* might be the bigger memory chunk where the RPC requests are read into. If an exclusive instance
|
||||
* is required, use the {@link #cloneToCell()} to have the contents of the cell copied to an
|
||||
* exclusive memory area.
|
||||
*/
|
||||
@InterfaceAudience.Private
|
||||
public interface ShareableMemory {
|
||||
|
||||
/**
|
||||
* Does a deep copy of the contents to a new memory area and returns it in the form of a cell.
|
||||
* @return The deep cloned cell
|
||||
*/
|
||||
Cell cloneToCell();
|
||||
}
|
|
@ -24,10 +24,13 @@ import java.nio.ByteBuffer;
|
|||
|
||||
import org.apache.hadoop.hbase.Cell;
|
||||
import org.apache.hadoop.hbase.HBaseInterfaceAudience;
|
||||
import org.apache.hadoop.hbase.KeyValue;
|
||||
import org.apache.hadoop.hbase.KeyValueUtil;
|
||||
import org.apache.hadoop.hbase.NoTagsKeyValue;
|
||||
import org.apache.hadoop.hbase.ShareableMemory;
|
||||
import org.apache.hadoop.hbase.classification.InterfaceAudience;
|
||||
import org.apache.hadoop.hbase.util.ByteBufferUtils;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
|
||||
/**
|
||||
* Codec that does KeyValue version 1 serialization.
|
||||
|
@ -99,7 +102,35 @@ public class KeyValueCodec implements Codec {
|
|||
}
|
||||
|
||||
protected Cell createCell(byte[] buf, int offset, int len) {
|
||||
return new NoTagsKeyValue(buf, offset, len);
|
||||
return new ShareableMemoryNoTagsKeyValue(buf, offset, len);
|
||||
}
|
||||
|
||||
static class ShareableMemoryKeyValue extends KeyValue implements ShareableMemory {
|
||||
public ShareableMemoryKeyValue(byte[] bytes, int offset, int length) {
|
||||
super(bytes, offset, length);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Cell cloneToCell() {
|
||||
byte[] copy = Bytes.copy(this.bytes, this.offset, this.length);
|
||||
KeyValue kv = new KeyValue(copy, 0, copy.length);
|
||||
kv.setSequenceId(this.getSequenceId());
|
||||
return kv;
|
||||
}
|
||||
}
|
||||
|
||||
static class ShareableMemoryNoTagsKeyValue extends NoTagsKeyValue implements ShareableMemory {
|
||||
public ShareableMemoryNoTagsKeyValue(byte[] bytes, int offset, int length) {
|
||||
super(bytes, offset, length);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Cell cloneToCell() {
|
||||
byte[] copy = Bytes.copy(this.bytes, this.offset, this.length);
|
||||
KeyValue kv = new NoTagsKeyValue(copy, 0, copy.length);
|
||||
kv.setSequenceId(this.getSequenceId());
|
||||
return kv;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -24,7 +24,6 @@ import java.nio.ByteBuffer;
|
|||
|
||||
import org.apache.hadoop.hbase.Cell;
|
||||
import org.apache.hadoop.hbase.HBaseInterfaceAudience;
|
||||
import org.apache.hadoop.hbase.KeyValue;
|
||||
import org.apache.hadoop.hbase.KeyValueUtil;
|
||||
import org.apache.hadoop.hbase.classification.InterfaceAudience;
|
||||
|
||||
|
@ -85,7 +84,7 @@ public class KeyValueCodecWithTags implements Codec {
|
|||
}
|
||||
|
||||
protected Cell createCell(byte[] buf, int offset, int len) {
|
||||
return new KeyValue(buf, offset, len);
|
||||
return new ShareableMemoryKeyValue(buf, offset, len);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -33,6 +33,7 @@ import org.apache.hadoop.hbase.CellComparator;
|
|||
import org.apache.hadoop.hbase.CellUtil;
|
||||
import org.apache.hadoop.hbase.KeyValue;
|
||||
import org.apache.hadoop.hbase.KeyValueUtil;
|
||||
import org.apache.hadoop.hbase.ShareableMemory;
|
||||
import org.apache.hadoop.hbase.classification.InterfaceAudience;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
import org.apache.hadoop.hbase.util.ClassSize;
|
||||
|
@ -110,9 +111,31 @@ public abstract class AbstractMemStore implements MemStore {
|
|||
public long add(Cell cell) {
|
||||
Cell toAdd = maybeCloneWithAllocator(cell);
|
||||
boolean mslabUsed = (toAdd != cell);
|
||||
// This cell data is backed by the same byte[] where we read request in RPC(See HBASE-15180). By
|
||||
// default MSLAB is ON and we might have copied cell to MSLAB area. If not we must do below deep
|
||||
// copy. Or else we will keep referring to the bigger chunk of memory and prevent it from
|
||||
// getting GCed.
|
||||
// Copy to MSLAB would not have happened if
|
||||
// 1. MSLAB is turned OFF. See "hbase.hregion.memstore.mslab.enabled"
|
||||
// 2. When the size of the cell is bigger than the max size supported by MSLAB. See
|
||||
// "hbase.hregion.memstore.mslab.max.allocation". This defaults to 256 KB
|
||||
// 3. When cells are from Append/Increment operation.
|
||||
if (!mslabUsed) {
|
||||
toAdd = deepCopyIfNeeded(toAdd);
|
||||
}
|
||||
return internalAdd(toAdd, mslabUsed);
|
||||
}
|
||||
|
||||
private static Cell deepCopyIfNeeded(Cell cell) {
|
||||
// When Cell is backed by a shared memory chunk (this can be a chunk of memory where we read the
|
||||
// req into) the Cell instance will be of type ShareableMemory. Later we will add feature to
|
||||
// read the RPC request into pooled direct ByteBuffers.
|
||||
if (cell instanceof ShareableMemory) {
|
||||
return ((ShareableMemory) cell).cloneToCell();
|
||||
}
|
||||
return cell;
|
||||
}
|
||||
|
||||
/**
|
||||
* Update or insert the specified Cells.
|
||||
* <p>
|
||||
|
@ -156,10 +179,8 @@ public abstract class AbstractMemStore implements MemStore {
|
|||
*/
|
||||
@Override
|
||||
public long delete(Cell deleteCell) {
|
||||
Cell toAdd = maybeCloneWithAllocator(deleteCell);
|
||||
boolean mslabUsed = (toAdd != deleteCell);
|
||||
long s = internalAdd(toAdd, mslabUsed);
|
||||
return s;
|
||||
// Delete operation just adds the delete marker cell coming here.
|
||||
return add(deleteCell);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -245,6 +266,10 @@ public abstract class AbstractMemStore implements MemStore {
|
|||
// hitting OOME - see TestMemStore.testUpsertMSLAB for a
|
||||
// test that triggers the pathological case if we don't avoid MSLAB
|
||||
// here.
|
||||
// This cell data is backed by the same byte[] where we read request in RPC(See HBASE-15180). We
|
||||
// must do below deep copy. Or else we will keep referring to the bigger chunk of memory and
|
||||
// prevent it from getting GCed.
|
||||
cell = deepCopyIfNeeded(cell);
|
||||
long addedSize = internalAdd(cell, false);
|
||||
|
||||
// Get the Cells for the row/family/qualifier regardless of timestamp.
|
||||
|
|
Loading…
Reference in New Issue