HBASE-16698 Performance issue: handlers stuck waiting for CountDownLatch inside WALKey#getWriteEntry under high writing workload

This commit is contained in:
Michael Stack 2016-10-13 09:25:23 -07:00
parent c9c67d1a94
commit 9b13514483
4 changed files with 107 additions and 28 deletions

View File

@ -64,6 +64,7 @@ import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.atomic.LongAdder; import java.util.concurrent.atomic.LongAdder;
import java.util.concurrent.locks.Lock; import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReadWriteLock; import java.util.concurrent.locks.ReadWriteLock;
import java.util.concurrent.locks.ReentrantLock;
import java.util.concurrent.locks.ReentrantReadWriteLock; import java.util.concurrent.locks.ReentrantReadWriteLock;
import org.apache.commons.logging.Log; import org.apache.commons.logging.Log;
@ -197,6 +198,10 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi
public static final String LOAD_CFS_ON_DEMAND_CONFIG_KEY = public static final String LOAD_CFS_ON_DEMAND_CONFIG_KEY =
"hbase.hregion.scan.loadColumnFamiliesOnDemand"; "hbase.hregion.scan.loadColumnFamiliesOnDemand";
/** Config key for using mvcc pre-assign feature for put */
public static final String HREGION_MVCC_PRE_ASSIGN = "hbase.hregion.mvcc.preassign";
public static final boolean DEFAULT_HREGION_MVCC_PRE_ASSIGN = true;
/** /**
* This is the global default value for durability. All tables/mutations not * This is the global default value for durability. All tables/mutations not
* defining a durability or using USE_DEFAULT will default to this value. * defining a durability or using USE_DEFAULT will default to this value.
@ -585,6 +590,9 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi
// that has non-default scope // that has non-default scope
private final NavigableMap<byte[], Integer> replicationScope = new TreeMap<byte[], Integer>( private final NavigableMap<byte[], Integer> replicationScope = new TreeMap<byte[], Integer>(
Bytes.BYTES_COMPARATOR); Bytes.BYTES_COMPARATOR);
// flag and lock for MVCC preassign
private final boolean mvccPreAssign;
private final ReentrantLock preAssignMvccLock;
/** /**
* HRegion constructor. This constructor should only be used for testing and * HRegion constructor. This constructor should only be used for testing and
@ -744,6 +752,14 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi
false : false :
conf.getBoolean(HConstants.ENABLE_CLIENT_BACKPRESSURE, conf.getBoolean(HConstants.ENABLE_CLIENT_BACKPRESSURE,
HConstants.DEFAULT_ENABLE_CLIENT_BACKPRESSURE); HConstants.DEFAULT_ENABLE_CLIENT_BACKPRESSURE);
// get mvcc pre-assign flag and lock
this.mvccPreAssign = conf.getBoolean(HREGION_MVCC_PRE_ASSIGN, DEFAULT_HREGION_MVCC_PRE_ASSIGN);
if (this.mvccPreAssign) {
this.preAssignMvccLock = new ReentrantLock();
} else {
this.preAssignMvccLock = null;
}
} }
void setHTableSpecificConf() { void setHTableSpecificConf() {
@ -3215,36 +3231,61 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi
// STEP 4. Append the final edit to WAL and sync. // STEP 4. Append the final edit to WAL and sync.
Mutation mutation = batchOp.getMutation(firstIndex); Mutation mutation = batchOp.getMutation(firstIndex);
WALKey walKey = null; WALKey walKey = null;
long txid;
if (replay) { if (replay) {
// use wal key from the original // use wal key from the original
walKey = new ReplayHLogKey(this.getRegionInfo().getEncodedNameAsBytes(), walKey = new ReplayHLogKey(this.getRegionInfo().getEncodedNameAsBytes(),
this.htableDescriptor.getTableName(), WALKey.NO_SEQUENCE_ID, now, this.htableDescriptor.getTableName(), WALKey.NO_SEQUENCE_ID, now,
mutation.getClusterIds(), currentNonceGroup, currentNonce, mvcc); mutation.getClusterIds(), currentNonceGroup, currentNonce, mvcc);
walKey.setOrigLogSeqNum(batchOp.getReplaySequenceId()); walKey.setOrigLogSeqNum(batchOp.getReplaySequenceId());
}
// Not sure what is going on here when replay is going on... does the below append get
// called for replayed edits? Am afraid to change it without test.
if (!walEdit.isEmpty()) { if (!walEdit.isEmpty()) {
if (!replay) { txid = this.wal.append(this.getRegionInfo(), walKey, walEdit, true);
if (txid != 0) {
sync(txid, durability);
}
}
} else {
try {
if (!walEdit.isEmpty()) {
try {
if (this.mvccPreAssign) {
preAssignMvccLock.lock();
writeEntry = mvcc.begin();
}
// we use HLogKey here instead of WALKey directly to support legacy coprocessors. // we use HLogKey here instead of WALKey directly to support legacy coprocessors.
walKey = new HLogKey(this.getRegionInfo().getEncodedNameAsBytes(), walKey = new HLogKey(this.getRegionInfo().getEncodedNameAsBytes(),
this.htableDescriptor.getTableName(), WALKey.NO_SEQUENCE_ID, now, this.htableDescriptor.getTableName(), WALKey.NO_SEQUENCE_ID, now,
mutation.getClusterIds(), currentNonceGroup, currentNonce, mvcc, mutation.getClusterIds(), currentNonceGroup, currentNonce, mvcc,
this.getReplicationScope()); this.getReplicationScope());
if (this.mvccPreAssign) {
walKey.setPreAssignedWriteEntry(writeEntry);
} }
// TODO: Use the doAppend methods below... complicated by the replay stuff above. // TODO: Use the doAppend methods below... complicated by the replay stuff above.
try { txid = this.wal.append(this.getRegionInfo(), walKey, walEdit, true);
long txid = this.wal.append(this.getRegionInfo(), walKey, } finally {
walEdit, true); if (mvccPreAssign) {
if (txid != 0) sync(txid, durability); preAssignMvccLock.unlock();
}
}
if (txid != 0) {
sync(txid, durability);
}
if (writeEntry == null) {
// if MVCC not preassigned, wait here until assigned
writeEntry = walKey.getWriteEntry(); writeEntry = walKey.getWriteEntry();
}
}
} catch (IOException ioe) { } catch (IOException ioe) {
if (walKey != null) mvcc.complete(walKey.getWriteEntry()); if (walKey != null && writeEntry == null) {
// the writeEntry is not preassigned and error occurred during append or sync
mvcc.complete(walKey.getWriteEntry());
}
throw ioe; throw ioe;
} }
} }
if (walKey == null) { if (walKey == null) {
// If no walKey, then skipping WAL or some such. Being an mvcc transaction so sequenceid. // If no walKey, then not in replay and skipping WAL or some such. Begin an MVCC transaction
// to get sequence id.
writeEntry = mvcc.begin(); writeEntry = mvcc.begin();
} }
@ -3267,7 +3308,9 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi
// STEP 6. Complete mvcc. // STEP 6. Complete mvcc.
if (replay) { if (replay) {
this.mvcc.advanceTo(batchOp.getReplaySequenceId()); this.mvcc.advanceTo(batchOp.getReplaySequenceId());
} else if (writeEntry != null/*Can be null if in replay mode*/) { } else {
// writeEntry won't be empty if not in replay mode
assert writeEntry != null;
mvcc.completeAndWait(writeEntry); mvcc.completeAndWait(writeEntry);
writeEntry = null; writeEntry = null;
} }
@ -7592,9 +7635,9 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi
public static final long FIXED_OVERHEAD = ClassSize.align( public static final long FIXED_OVERHEAD = ClassSize.align(
ClassSize.OBJECT + ClassSize.OBJECT +
ClassSize.ARRAY + ClassSize.ARRAY +
49 * ClassSize.REFERENCE + 2 * Bytes.SIZEOF_INT + 50 * ClassSize.REFERENCE + 2 * Bytes.SIZEOF_INT +
(14 * Bytes.SIZEOF_LONG) + (14 * Bytes.SIZEOF_LONG) +
5 * Bytes.SIZEOF_BOOLEAN); 6 * Bytes.SIZEOF_BOOLEAN);
// woefully out of date - currently missing: // woefully out of date - currently missing:
// 1 x HashMap - coprocessorServiceHandlers // 1 x HashMap - coprocessorServiceHandlers

View File

@ -112,11 +112,16 @@ class FSWALEntry extends Entry {
} }
stamped = true; stamped = true;
long regionSequenceId = WALKey.NO_SEQUENCE_ID; long regionSequenceId = WALKey.NO_SEQUENCE_ID;
MultiVersionConcurrencyControl mvcc = getKey().getMvcc(); WALKey key = getKey();
MultiVersionConcurrencyControl.WriteEntry we = null; MultiVersionConcurrencyControl.WriteEntry we = key.getPreAssignedWriteEntry();
boolean preAssigned = (we != null);
if (!preAssigned) {
MultiVersionConcurrencyControl mvcc = key.getMvcc();
if (mvcc != null) { if (mvcc != null) {
we = mvcc.begin(); we = mvcc.begin();
}
}
if (we != null) {
regionSequenceId = we.getWriteNumber(); regionSequenceId = we.getWriteNumber();
} }
@ -125,7 +130,9 @@ class FSWALEntry extends Entry {
CellUtil.setSequenceId(c, regionSequenceId); CellUtil.setSequenceId(c, regionSequenceId);
} }
} }
getKey().setWriteEntry(we); if (!preAssigned) {
key.setWriteEntry(we);
}
return regionSequenceId; return regionSequenceId;
} }

View File

@ -42,6 +42,7 @@ import org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos;
import org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos.FamilyScope; import org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos.FamilyScope;
import org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos.ScopeType; import org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos.ScopeType;
import org.apache.hadoop.hbase.regionserver.MultiVersionConcurrencyControl; import org.apache.hadoop.hbase.regionserver.MultiVersionConcurrencyControl;
import org.apache.hadoop.hbase.regionserver.MultiVersionConcurrencyControl.WriteEntry;
import org.apache.hadoop.hbase.regionserver.SequenceId; import org.apache.hadoop.hbase.regionserver.SequenceId;
// imports for things that haven't moved from regionserver.wal yet. // imports for things that haven't moved from regionserver.wal yet.
import org.apache.hadoop.hbase.regionserver.wal.CompressionContext; import org.apache.hadoop.hbase.regionserver.wal.CompressionContext;
@ -92,6 +93,10 @@ public class WALKey implements SequenceId, Comparable<WALKey> {
*/ */
@InterfaceAudience.Private // For internal use only. @InterfaceAudience.Private // For internal use only.
public MultiVersionConcurrencyControl.WriteEntry getWriteEntry() throws InterruptedIOException { public MultiVersionConcurrencyControl.WriteEntry getWriteEntry() throws InterruptedIOException {
if (this.preAssignedWriteEntry != null) {
// don't wait for seqNumAssignedLatch if writeEntry is preassigned
return this.preAssignedWriteEntry;
}
try { try {
this.sequenceIdAssignedLatch.await(); this.sequenceIdAssignedLatch.await();
} catch (InterruptedException ie) { } catch (InterruptedException ie) {
@ -203,6 +208,7 @@ public class WALKey implements SequenceId, Comparable<WALKey> {
* Set in a way visible to multiple threads; e.g. synchronized getter/setters. * Set in a way visible to multiple threads; e.g. synchronized getter/setters.
*/ */
private MultiVersionConcurrencyControl.WriteEntry writeEntry; private MultiVersionConcurrencyControl.WriteEntry writeEntry;
private MultiVersionConcurrencyControl.WriteEntry preAssignedWriteEntry = null;
public static final List<UUID> EMPTY_UUIDS = Collections.unmodifiableList(new ArrayList<UUID>()); public static final List<UUID> EMPTY_UUIDS = Collections.unmodifiableList(new ArrayList<UUID>());
// visible for deprecated HLogKey // visible for deprecated HLogKey
@ -731,4 +737,24 @@ public class WALKey implements SequenceId, Comparable<WALKey> {
this.origLogSeqNum = walKey.getOrigSequenceNumber(); this.origLogSeqNum = walKey.getOrigSequenceNumber();
} }
} }
/**
* @return The preassigned writeEntry, if any
*/
@InterfaceAudience.Private // For internal use only.
public MultiVersionConcurrencyControl.WriteEntry getPreAssignedWriteEntry() {
return this.preAssignedWriteEntry;
}
/**
* Preassign writeEntry
* @param writeEntry the entry to assign
*/
@InterfaceAudience.Private // For internal use only.
public void setPreAssignedWriteEntry(WriteEntry writeEntry) {
if (writeEntry != null) {
this.preAssignedWriteEntry = writeEntry;
this.sequenceId = writeEntry.getWriteNumber();
}
}
} }

View File

@ -6290,8 +6290,11 @@ public class TestHRegion {
@Override @Override
public Long answer(InvocationOnMock invocation) throws Throwable { public Long answer(InvocationOnMock invocation) throws Throwable {
WALKey key = invocation.getArgumentAt(1, WALKey.class); WALKey key = invocation.getArgumentAt(1, WALKey.class);
MultiVersionConcurrencyControl.WriteEntry we = key.getMvcc().begin(); MultiVersionConcurrencyControl.WriteEntry we = key.getPreAssignedWriteEntry();
if (we == null) {
we = key.getMvcc().begin();
key.setWriteEntry(we); key.setWriteEntry(we);
}
return 1L; return 1L;
} }