HBASE-16205 When Cells are not copied to MSLAB, deep clone it while adding to Memstore.

This commit is contained in:
anoopsjohn 2016-07-25 22:58:32 +05:30
parent 6dbce2a8cb
commit 2df0ef549a
4 changed files with 101 additions and 7 deletions

View File

@ -0,0 +1,39 @@
/**
* Copyright The Apache Software Foundation
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase;
import org.apache.hadoop.hbase.classification.InterfaceAudience;
/**
* A cell implementing this interface would mean that the memory area backing this cell will refer
* to a memory area that could be part of a larger common memory area used by the RegionServer. This
* might be the bigger memory chunk where the RPC requests are read into. If an exclusive instance
* is required, use the {@link #cloneToCell()} to have the contents of the cell copied to an
* exclusive memory area.
*/
@InterfaceAudience.Private
public interface ShareableMemory {
/**
* Does a deep copy of the contents to a new memory area and returns it in the form of a cell.
* @return The deep cloned cell
*/
Cell cloneToCell();
}

View File

@ -24,10 +24,13 @@ import java.nio.ByteBuffer;
import org.apache.hadoop.hbase.Cell; import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.HBaseInterfaceAudience; import org.apache.hadoop.hbase.HBaseInterfaceAudience;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.KeyValueUtil; import org.apache.hadoop.hbase.KeyValueUtil;
import org.apache.hadoop.hbase.NoTagsKeyValue; import org.apache.hadoop.hbase.NoTagsKeyValue;
import org.apache.hadoop.hbase.ShareableMemory;
import org.apache.hadoop.hbase.classification.InterfaceAudience; import org.apache.hadoop.hbase.classification.InterfaceAudience;
import org.apache.hadoop.hbase.util.ByteBufferUtils; import org.apache.hadoop.hbase.util.ByteBufferUtils;
import org.apache.hadoop.hbase.util.Bytes;
/** /**
* Codec that does KeyValue version 1 serialization. * Codec that does KeyValue version 1 serialization.
@ -99,7 +102,35 @@ public class KeyValueCodec implements Codec {
} }
protected Cell createCell(byte[] buf, int offset, int len) { protected Cell createCell(byte[] buf, int offset, int len) {
return new NoTagsKeyValue(buf, offset, len); return new ShareableMemoryNoTagsKeyValue(buf, offset, len);
}
static class ShareableMemoryKeyValue extends KeyValue implements ShareableMemory {
public ShareableMemoryKeyValue(byte[] bytes, int offset, int length) {
super(bytes, offset, length);
}
@Override
public Cell cloneToCell() {
byte[] copy = Bytes.copy(this.bytes, this.offset, this.length);
KeyValue kv = new KeyValue(copy, 0, copy.length);
kv.setSequenceId(this.getSequenceId());
return kv;
}
}
static class ShareableMemoryNoTagsKeyValue extends NoTagsKeyValue implements ShareableMemory {
public ShareableMemoryNoTagsKeyValue(byte[] bytes, int offset, int length) {
super(bytes, offset, length);
}
@Override
public Cell cloneToCell() {
byte[] copy = Bytes.copy(this.bytes, this.offset, this.length);
KeyValue kv = new NoTagsKeyValue(copy, 0, copy.length);
kv.setSequenceId(this.getSequenceId());
return kv;
}
} }
} }

View File

@ -24,7 +24,6 @@ import java.nio.ByteBuffer;
import org.apache.hadoop.hbase.Cell; import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.HBaseInterfaceAudience; import org.apache.hadoop.hbase.HBaseInterfaceAudience;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.KeyValueUtil; import org.apache.hadoop.hbase.KeyValueUtil;
import org.apache.hadoop.hbase.classification.InterfaceAudience; import org.apache.hadoop.hbase.classification.InterfaceAudience;
@ -85,7 +84,7 @@ public class KeyValueCodecWithTags implements Codec {
} }
protected Cell createCell(byte[] buf, int offset, int len) { protected Cell createCell(byte[] buf, int offset, int len) {
return new KeyValue(buf, offset, len); return new ShareableMemoryKeyValue(buf, offset, len);
} }
} }

View File

@ -33,6 +33,7 @@ import org.apache.hadoop.hbase.CellComparator;
import org.apache.hadoop.hbase.CellUtil; import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.KeyValueUtil; import org.apache.hadoop.hbase.KeyValueUtil;
import org.apache.hadoop.hbase.ShareableMemory;
import org.apache.hadoop.hbase.classification.InterfaceAudience; import org.apache.hadoop.hbase.classification.InterfaceAudience;
import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.ClassSize; import org.apache.hadoop.hbase.util.ClassSize;
@ -110,9 +111,31 @@ public abstract class AbstractMemStore implements MemStore {
public long add(Cell cell) { public long add(Cell cell) {
Cell toAdd = maybeCloneWithAllocator(cell); Cell toAdd = maybeCloneWithAllocator(cell);
boolean mslabUsed = (toAdd != cell); boolean mslabUsed = (toAdd != cell);
// This cell data is backed by the same byte[] where we read request in RPC(See HBASE-15180). By
// default MSLAB is ON and we might have copied cell to MSLAB area. If not we must do below deep
// copy. Or else we will keep referring to the bigger chunk of memory and prevent it from
// getting GCed.
// Copy to MSLAB would not have happened if
// 1. MSLAB is turned OFF. See "hbase.hregion.memstore.mslab.enabled"
// 2. When the size of the cell is bigger than the max size supported by MSLAB. See
// "hbase.hregion.memstore.mslab.max.allocation". This defaults to 256 KB
// 3. When cells are from Append/Increment operation.
if (!mslabUsed) {
toAdd = deepCopyIfNeeded(toAdd);
}
return internalAdd(toAdd, mslabUsed); return internalAdd(toAdd, mslabUsed);
} }
private static Cell deepCopyIfNeeded(Cell cell) {
// When Cell is backed by a shared memory chunk (this can be a chunk of memory where we read the
// req into) the Cell instance will be of type ShareableMemory. Later we will add feature to
// read the RPC request into pooled direct ByteBuffers.
if (cell instanceof ShareableMemory) {
return ((ShareableMemory) cell).cloneToCell();
}
return cell;
}
/** /**
* Update or insert the specified Cells. * Update or insert the specified Cells.
* <p> * <p>
@ -156,10 +179,8 @@ public abstract class AbstractMemStore implements MemStore {
*/ */
@Override @Override
public long delete(Cell deleteCell) { public long delete(Cell deleteCell) {
Cell toAdd = maybeCloneWithAllocator(deleteCell); // Delete operation just adds the delete marker cell coming here.
boolean mslabUsed = (toAdd != deleteCell); return add(deleteCell);
long s = internalAdd(toAdd, mslabUsed);
return s;
} }
/** /**
@ -245,6 +266,10 @@ public abstract class AbstractMemStore implements MemStore {
// hitting OOME - see TestMemStore.testUpsertMSLAB for a // hitting OOME - see TestMemStore.testUpsertMSLAB for a
// test that triggers the pathological case if we don't avoid MSLAB // test that triggers the pathological case if we don't avoid MSLAB
// here. // here.
// This cell data is backed by the same byte[] where we read request in RPC(See HBASE-15180). We
// must do below deep copy. Or else we will keep referring to the bigger chunk of memory and
// prevent it from getting GCed.
cell = deepCopyIfNeeded(cell);
long addedSize = internalAdd(cell, false); long addedSize = internalAdd(cell, false);
// Get the Cells for the row/family/qualifier regardless of timestamp. // Get the Cells for the row/family/qualifier regardless of timestamp.