HBASE-16698 Performance issue: handlers stuck waiting for CountDownLatch inside WALKey#getWriteEntry under high writing workload
This commit is contained in:
parent
c9c67d1a94
commit
9b13514483
|
@ -64,6 +64,7 @@ import java.util.concurrent.atomic.AtomicLong;
|
|||
import java.util.concurrent.atomic.LongAdder;
|
||||
import java.util.concurrent.locks.Lock;
|
||||
import java.util.concurrent.locks.ReadWriteLock;
|
||||
import java.util.concurrent.locks.ReentrantLock;
|
||||
import java.util.concurrent.locks.ReentrantReadWriteLock;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
|
@ -197,6 +198,10 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi
|
|||
public static final String LOAD_CFS_ON_DEMAND_CONFIG_KEY =
|
||||
"hbase.hregion.scan.loadColumnFamiliesOnDemand";
|
||||
|
||||
/** Config key for using mvcc pre-assign feature for put */
|
||||
public static final String HREGION_MVCC_PRE_ASSIGN = "hbase.hregion.mvcc.preassign";
|
||||
public static final boolean DEFAULT_HREGION_MVCC_PRE_ASSIGN = true;
|
||||
|
||||
/**
|
||||
* This is the global default value for durability. All tables/mutations not
|
||||
* defining a durability or using USE_DEFAULT will default to this value.
|
||||
|
@ -585,6 +590,9 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi
|
|||
// that has non-default scope
|
||||
private final NavigableMap<byte[], Integer> replicationScope = new TreeMap<byte[], Integer>(
|
||||
Bytes.BYTES_COMPARATOR);
|
||||
// flag and lock for MVCC preassign
|
||||
private final boolean mvccPreAssign;
|
||||
private final ReentrantLock preAssignMvccLock;
|
||||
|
||||
/**
|
||||
* HRegion constructor. This constructor should only be used for testing and
|
||||
|
@ -744,6 +752,14 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi
|
|||
false :
|
||||
conf.getBoolean(HConstants.ENABLE_CLIENT_BACKPRESSURE,
|
||||
HConstants.DEFAULT_ENABLE_CLIENT_BACKPRESSURE);
|
||||
|
||||
// get mvcc pre-assign flag and lock
|
||||
this.mvccPreAssign = conf.getBoolean(HREGION_MVCC_PRE_ASSIGN, DEFAULT_HREGION_MVCC_PRE_ASSIGN);
|
||||
if (this.mvccPreAssign) {
|
||||
this.preAssignMvccLock = new ReentrantLock();
|
||||
} else {
|
||||
this.preAssignMvccLock = null;
|
||||
}
|
||||
}
|
||||
|
||||
void setHTableSpecificConf() {
|
||||
|
@ -3215,36 +3231,61 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi
|
|||
// STEP 4. Append the final edit to WAL and sync.
|
||||
Mutation mutation = batchOp.getMutation(firstIndex);
|
||||
WALKey walKey = null;
|
||||
long txid;
|
||||
if (replay) {
|
||||
// use wal key from the original
|
||||
walKey = new ReplayHLogKey(this.getRegionInfo().getEncodedNameAsBytes(),
|
||||
this.htableDescriptor.getTableName(), WALKey.NO_SEQUENCE_ID, now,
|
||||
mutation.getClusterIds(), currentNonceGroup, currentNonce, mvcc);
|
||||
walKey.setOrigLogSeqNum(batchOp.getReplaySequenceId());
|
||||
}
|
||||
// Not sure what is going on here when replay is going on... does the below append get
|
||||
// called for replayed edits? Am afraid to change it without test.
|
||||
if (!walEdit.isEmpty()) {
|
||||
if (!replay) {
|
||||
txid = this.wal.append(this.getRegionInfo(), walKey, walEdit, true);
|
||||
if (txid != 0) {
|
||||
sync(txid, durability);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
try {
|
||||
if (!walEdit.isEmpty()) {
|
||||
try {
|
||||
if (this.mvccPreAssign) {
|
||||
preAssignMvccLock.lock();
|
||||
writeEntry = mvcc.begin();
|
||||
}
|
||||
// we use HLogKey here instead of WALKey directly to support legacy coprocessors.
|
||||
walKey = new HLogKey(this.getRegionInfo().getEncodedNameAsBytes(),
|
||||
this.htableDescriptor.getTableName(), WALKey.NO_SEQUENCE_ID, now,
|
||||
mutation.getClusterIds(), currentNonceGroup, currentNonce, mvcc,
|
||||
this.getReplicationScope());
|
||||
if (this.mvccPreAssign) {
|
||||
walKey.setPreAssignedWriteEntry(writeEntry);
|
||||
}
|
||||
// TODO: Use the doAppend methods below... complicated by the replay stuff above.
|
||||
try {
|
||||
long txid = this.wal.append(this.getRegionInfo(), walKey,
|
||||
walEdit, true);
|
||||
if (txid != 0) sync(txid, durability);
|
||||
txid = this.wal.append(this.getRegionInfo(), walKey, walEdit, true);
|
||||
} finally {
|
||||
if (mvccPreAssign) {
|
||||
preAssignMvccLock.unlock();
|
||||
}
|
||||
}
|
||||
if (txid != 0) {
|
||||
sync(txid, durability);
|
||||
}
|
||||
if (writeEntry == null) {
|
||||
// if MVCC not preassigned, wait here until assigned
|
||||
writeEntry = walKey.getWriteEntry();
|
||||
}
|
||||
}
|
||||
} catch (IOException ioe) {
|
||||
if (walKey != null) mvcc.complete(walKey.getWriteEntry());
|
||||
if (walKey != null && writeEntry == null) {
|
||||
// the writeEntry is not preassigned and error occurred during append or sync
|
||||
mvcc.complete(walKey.getWriteEntry());
|
||||
}
|
||||
throw ioe;
|
||||
}
|
||||
}
|
||||
if (walKey == null) {
|
||||
// If no walKey, then skipping WAL or some such. Being an mvcc transaction so sequenceid.
|
||||
// If no walKey, then not in replay and skipping WAL or some such. Begin an MVCC transaction
|
||||
// to get sequence id.
|
||||
writeEntry = mvcc.begin();
|
||||
}
|
||||
|
||||
|
@ -3267,7 +3308,9 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi
|
|||
// STEP 6. Complete mvcc.
|
||||
if (replay) {
|
||||
this.mvcc.advanceTo(batchOp.getReplaySequenceId());
|
||||
} else if (writeEntry != null/*Can be null if in replay mode*/) {
|
||||
} else {
|
||||
// writeEntry won't be empty if not in replay mode
|
||||
assert writeEntry != null;
|
||||
mvcc.completeAndWait(writeEntry);
|
||||
writeEntry = null;
|
||||
}
|
||||
|
@ -7592,9 +7635,9 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi
|
|||
public static final long FIXED_OVERHEAD = ClassSize.align(
|
||||
ClassSize.OBJECT +
|
||||
ClassSize.ARRAY +
|
||||
49 * ClassSize.REFERENCE + 2 * Bytes.SIZEOF_INT +
|
||||
50 * ClassSize.REFERENCE + 2 * Bytes.SIZEOF_INT +
|
||||
(14 * Bytes.SIZEOF_LONG) +
|
||||
5 * Bytes.SIZEOF_BOOLEAN);
|
||||
6 * Bytes.SIZEOF_BOOLEAN);
|
||||
|
||||
// woefully out of date - currently missing:
|
||||
// 1 x HashMap - coprocessorServiceHandlers
|
||||
|
|
|
@ -112,11 +112,16 @@ class FSWALEntry extends Entry {
|
|||
}
|
||||
stamped = true;
|
||||
long regionSequenceId = WALKey.NO_SEQUENCE_ID;
|
||||
MultiVersionConcurrencyControl mvcc = getKey().getMvcc();
|
||||
MultiVersionConcurrencyControl.WriteEntry we = null;
|
||||
|
||||
WALKey key = getKey();
|
||||
MultiVersionConcurrencyControl.WriteEntry we = key.getPreAssignedWriteEntry();
|
||||
boolean preAssigned = (we != null);
|
||||
if (!preAssigned) {
|
||||
MultiVersionConcurrencyControl mvcc = key.getMvcc();
|
||||
if (mvcc != null) {
|
||||
we = mvcc.begin();
|
||||
}
|
||||
}
|
||||
if (we != null) {
|
||||
regionSequenceId = we.getWriteNumber();
|
||||
}
|
||||
|
||||
|
@ -125,7 +130,9 @@ class FSWALEntry extends Entry {
|
|||
CellUtil.setSequenceId(c, regionSequenceId);
|
||||
}
|
||||
}
|
||||
getKey().setWriteEntry(we);
|
||||
if (!preAssigned) {
|
||||
key.setWriteEntry(we);
|
||||
}
|
||||
return regionSequenceId;
|
||||
}
|
||||
|
||||
|
|
|
@ -42,6 +42,7 @@ import org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos;
|
|||
import org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos.FamilyScope;
|
||||
import org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos.ScopeType;
|
||||
import org.apache.hadoop.hbase.regionserver.MultiVersionConcurrencyControl;
|
||||
import org.apache.hadoop.hbase.regionserver.MultiVersionConcurrencyControl.WriteEntry;
|
||||
import org.apache.hadoop.hbase.regionserver.SequenceId;
|
||||
// imports for things that haven't moved from regionserver.wal yet.
|
||||
import org.apache.hadoop.hbase.regionserver.wal.CompressionContext;
|
||||
|
@ -92,6 +93,10 @@ public class WALKey implements SequenceId, Comparable<WALKey> {
|
|||
*/
|
||||
@InterfaceAudience.Private // For internal use only.
|
||||
public MultiVersionConcurrencyControl.WriteEntry getWriteEntry() throws InterruptedIOException {
|
||||
if (this.preAssignedWriteEntry != null) {
|
||||
// don't wait for seqNumAssignedLatch if writeEntry is preassigned
|
||||
return this.preAssignedWriteEntry;
|
||||
}
|
||||
try {
|
||||
this.sequenceIdAssignedLatch.await();
|
||||
} catch (InterruptedException ie) {
|
||||
|
@ -203,6 +208,7 @@ public class WALKey implements SequenceId, Comparable<WALKey> {
|
|||
* Set in a way visible to multiple threads; e.g. synchronized getter/setters.
|
||||
*/
|
||||
private MultiVersionConcurrencyControl.WriteEntry writeEntry;
|
||||
private MultiVersionConcurrencyControl.WriteEntry preAssignedWriteEntry = null;
|
||||
public static final List<UUID> EMPTY_UUIDS = Collections.unmodifiableList(new ArrayList<UUID>());
|
||||
|
||||
// visible for deprecated HLogKey
|
||||
|
@ -731,4 +737,24 @@ public class WALKey implements SequenceId, Comparable<WALKey> {
|
|||
this.origLogSeqNum = walKey.getOrigSequenceNumber();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @return The preassigned writeEntry, if any
|
||||
*/
|
||||
@InterfaceAudience.Private // For internal use only.
|
||||
public MultiVersionConcurrencyControl.WriteEntry getPreAssignedWriteEntry() {
|
||||
return this.preAssignedWriteEntry;
|
||||
}
|
||||
|
||||
/**
|
||||
* Preassign writeEntry
|
||||
* @param writeEntry the entry to assign
|
||||
*/
|
||||
@InterfaceAudience.Private // For internal use only.
|
||||
public void setPreAssignedWriteEntry(WriteEntry writeEntry) {
|
||||
if (writeEntry != null) {
|
||||
this.preAssignedWriteEntry = writeEntry;
|
||||
this.sequenceId = writeEntry.getWriteNumber();
|
||||
}
|
||||
}
|
||||
}
|
|
@ -6290,8 +6290,11 @@ public class TestHRegion {
|
|||
@Override
|
||||
public Long answer(InvocationOnMock invocation) throws Throwable {
|
||||
WALKey key = invocation.getArgumentAt(1, WALKey.class);
|
||||
MultiVersionConcurrencyControl.WriteEntry we = key.getMvcc().begin();
|
||||
MultiVersionConcurrencyControl.WriteEntry we = key.getPreAssignedWriteEntry();
|
||||
if (we == null) {
|
||||
we = key.getMvcc().begin();
|
||||
key.setWriteEntry(we);
|
||||
}
|
||||
return 1L;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue