From 9b13514483991889cd6ebe097c3c8eb0e7983e6d Mon Sep 17 00:00:00 2001 From: Michael Stack Date: Thu, 13 Oct 2016 09:25:23 -0700 Subject: [PATCH] HBASE-16698 Performance issue: handlers stuck waiting for CountDownLatch inside WALKey#getWriteEntry under high writing workload --- .../hadoop/hbase/regionserver/HRegion.java | 83 ++++++++++++++----- .../hbase/regionserver/wal/FSWALEntry.java | 19 +++-- .../org/apache/hadoop/hbase/wal/WALKey.java | 26 ++++++ .../hbase/regionserver/TestHRegion.java | 7 +- 4 files changed, 107 insertions(+), 28 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java index ccc92d1eff8..3715ca10d8d 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java @@ -64,6 +64,7 @@ import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.atomic.LongAdder; import java.util.concurrent.locks.Lock; import java.util.concurrent.locks.ReadWriteLock; +import java.util.concurrent.locks.ReentrantLock; import java.util.concurrent.locks.ReentrantReadWriteLock; import org.apache.commons.logging.Log; @@ -197,6 +198,10 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi public static final String LOAD_CFS_ON_DEMAND_CONFIG_KEY = "hbase.hregion.scan.loadColumnFamiliesOnDemand"; + /** Config key for using mvcc pre-assign feature for put */ + public static final String HREGION_MVCC_PRE_ASSIGN = "hbase.hregion.mvcc.preassign"; + public static final boolean DEFAULT_HREGION_MVCC_PRE_ASSIGN = true; + /** * This is the global default value for durability. All tables/mutations not * defining a durability or using USE_DEFAULT will default to this value. @@ -585,6 +590,9 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi // that has non-default scope private final NavigableMap replicationScope = new TreeMap( Bytes.BYTES_COMPARATOR); + // flag and lock for MVCC preassign + private final boolean mvccPreAssign; + private final ReentrantLock preAssignMvccLock; /** * HRegion constructor. This constructor should only be used for testing and @@ -744,6 +752,14 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi false : conf.getBoolean(HConstants.ENABLE_CLIENT_BACKPRESSURE, HConstants.DEFAULT_ENABLE_CLIENT_BACKPRESSURE); + + // get mvcc pre-assign flag and lock + this.mvccPreAssign = conf.getBoolean(HREGION_MVCC_PRE_ASSIGN, DEFAULT_HREGION_MVCC_PRE_ASSIGN); + if (this.mvccPreAssign) { + this.preAssignMvccLock = new ReentrantLock(); + } else { + this.preAssignMvccLock = null; + } } void setHTableSpecificConf() { @@ -3215,36 +3231,61 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi // STEP 4. Append the final edit to WAL and sync. Mutation mutation = batchOp.getMutation(firstIndex); WALKey walKey = null; + long txid; if (replay) { // use wal key from the original walKey = new ReplayHLogKey(this.getRegionInfo().getEncodedNameAsBytes(), this.htableDescriptor.getTableName(), WALKey.NO_SEQUENCE_ID, now, mutation.getClusterIds(), currentNonceGroup, currentNonce, mvcc); walKey.setOrigLogSeqNum(batchOp.getReplaySequenceId()); - } - // Not sure what is going on here when replay is going on... does the below append get - // called for replayed edits? Am afraid to change it without test. - if (!walEdit.isEmpty()) { - if (!replay) { - // we use HLogKey here instead of WALKey directly to support legacy coprocessors. - walKey = new HLogKey(this.getRegionInfo().getEncodedNameAsBytes(), - this.htableDescriptor.getTableName(), WALKey.NO_SEQUENCE_ID, now, - mutation.getClusterIds(), currentNonceGroup, currentNonce, mvcc, - this.getReplicationScope()); + if (!walEdit.isEmpty()) { + txid = this.wal.append(this.getRegionInfo(), walKey, walEdit, true); + if (txid != 0) { + sync(txid, durability); + } } - // TODO: Use the doAppend methods below... complicated by the replay stuff above. + } else { try { - long txid = this.wal.append(this.getRegionInfo(), walKey, - walEdit, true); - if (txid != 0) sync(txid, durability); - writeEntry = walKey.getWriteEntry(); + if (!walEdit.isEmpty()) { + try { + if (this.mvccPreAssign) { + preAssignMvccLock.lock(); + writeEntry = mvcc.begin(); + } + // we use HLogKey here instead of WALKey directly to support legacy coprocessors. + walKey = new HLogKey(this.getRegionInfo().getEncodedNameAsBytes(), + this.htableDescriptor.getTableName(), WALKey.NO_SEQUENCE_ID, now, + mutation.getClusterIds(), currentNonceGroup, currentNonce, mvcc, + this.getReplicationScope()); + if (this.mvccPreAssign) { + walKey.setPreAssignedWriteEntry(writeEntry); + } + // TODO: Use the doAppend methods below... complicated by the replay stuff above. + txid = this.wal.append(this.getRegionInfo(), walKey, walEdit, true); + } finally { + if (mvccPreAssign) { + preAssignMvccLock.unlock(); + } + } + if (txid != 0) { + sync(txid, durability); + } + if (writeEntry == null) { + // if MVCC not preassigned, wait here until assigned + writeEntry = walKey.getWriteEntry(); + } + } } catch (IOException ioe) { - if (walKey != null) mvcc.complete(walKey.getWriteEntry()); + if (walKey != null && writeEntry == null) { + // the writeEntry is not preassigned and error occurred during append or sync + mvcc.complete(walKey.getWriteEntry()); + } throw ioe; } } if (walKey == null) { - // If no walKey, then skipping WAL or some such. Being an mvcc transaction so sequenceid. + // If no walKey, then not in replay and skipping WAL or some such. Begin an MVCC transaction + // to get sequence id. writeEntry = mvcc.begin(); } @@ -3267,7 +3308,9 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi // STEP 6. Complete mvcc. if (replay) { this.mvcc.advanceTo(batchOp.getReplaySequenceId()); - } else if (writeEntry != null/*Can be null if in replay mode*/) { + } else { + // writeEntry won't be empty if not in replay mode + assert writeEntry != null; mvcc.completeAndWait(writeEntry); writeEntry = null; } @@ -7592,9 +7635,9 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi public static final long FIXED_OVERHEAD = ClassSize.align( ClassSize.OBJECT + ClassSize.ARRAY + - 49 * ClassSize.REFERENCE + 2 * Bytes.SIZEOF_INT + + 50 * ClassSize.REFERENCE + 2 * Bytes.SIZEOF_INT + (14 * Bytes.SIZEOF_LONG) + - 5 * Bytes.SIZEOF_BOOLEAN); + 6 * Bytes.SIZEOF_BOOLEAN); // woefully out of date - currently missing: // 1 x HashMap - coprocessorServiceHandlers diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/wal/FSWALEntry.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/wal/FSWALEntry.java index 72474a007d0..c4546f5bdc9 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/wal/FSWALEntry.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/wal/FSWALEntry.java @@ -112,11 +112,16 @@ class FSWALEntry extends Entry { } stamped = true; long regionSequenceId = WALKey.NO_SEQUENCE_ID; - MultiVersionConcurrencyControl mvcc = getKey().getMvcc(); - MultiVersionConcurrencyControl.WriteEntry we = null; - - if (mvcc != null) { - we = mvcc.begin(); + WALKey key = getKey(); + MultiVersionConcurrencyControl.WriteEntry we = key.getPreAssignedWriteEntry(); + boolean preAssigned = (we != null); + if (!preAssigned) { + MultiVersionConcurrencyControl mvcc = key.getMvcc(); + if (mvcc != null) { + we = mvcc.begin(); + } + } + if (we != null) { regionSequenceId = we.getWriteNumber(); } @@ -125,7 +130,9 @@ class FSWALEntry extends Entry { CellUtil.setSequenceId(c, regionSequenceId); } } - getKey().setWriteEntry(we); + if (!preAssigned) { + key.setWriteEntry(we); + } return regionSequenceId; } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/wal/WALKey.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/wal/WALKey.java index 0e35cbebb60..4f2af38d6ea 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/wal/WALKey.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/wal/WALKey.java @@ -42,6 +42,7 @@ import org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos; import org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos.FamilyScope; import org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos.ScopeType; import org.apache.hadoop.hbase.regionserver.MultiVersionConcurrencyControl; +import org.apache.hadoop.hbase.regionserver.MultiVersionConcurrencyControl.WriteEntry; import org.apache.hadoop.hbase.regionserver.SequenceId; // imports for things that haven't moved from regionserver.wal yet. import org.apache.hadoop.hbase.regionserver.wal.CompressionContext; @@ -92,6 +93,10 @@ public class WALKey implements SequenceId, Comparable { */ @InterfaceAudience.Private // For internal use only. public MultiVersionConcurrencyControl.WriteEntry getWriteEntry() throws InterruptedIOException { + if (this.preAssignedWriteEntry != null) { + // don't wait for seqNumAssignedLatch if writeEntry is preassigned + return this.preAssignedWriteEntry; + } try { this.sequenceIdAssignedLatch.await(); } catch (InterruptedException ie) { @@ -203,6 +208,7 @@ public class WALKey implements SequenceId, Comparable { * Set in a way visible to multiple threads; e.g. synchronized getter/setters. */ private MultiVersionConcurrencyControl.WriteEntry writeEntry; + private MultiVersionConcurrencyControl.WriteEntry preAssignedWriteEntry = null; public static final List EMPTY_UUIDS = Collections.unmodifiableList(new ArrayList()); // visible for deprecated HLogKey @@ -731,4 +737,24 @@ public class WALKey implements SequenceId, Comparable { this.origLogSeqNum = walKey.getOrigSequenceNumber(); } } + + /** + * @return The preassigned writeEntry, if any + */ + @InterfaceAudience.Private // For internal use only. + public MultiVersionConcurrencyControl.WriteEntry getPreAssignedWriteEntry() { + return this.preAssignedWriteEntry; + } + + /** + * Preassign writeEntry + * @param writeEntry the entry to assign + */ + @InterfaceAudience.Private // For internal use only. + public void setPreAssignedWriteEntry(WriteEntry writeEntry) { + if (writeEntry != null) { + this.preAssignedWriteEntry = writeEntry; + this.sequenceId = writeEntry.getWriteNumber(); + } + } } \ No newline at end of file diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegion.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegion.java index 612d6cfb50f..0b8eefae54f 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegion.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegion.java @@ -6290,8 +6290,11 @@ public class TestHRegion { @Override public Long answer(InvocationOnMock invocation) throws Throwable { WALKey key = invocation.getArgumentAt(1, WALKey.class); - MultiVersionConcurrencyControl.WriteEntry we = key.getMvcc().begin(); - key.setWriteEntry(we); + MultiVersionConcurrencyControl.WriteEntry we = key.getPreAssignedWriteEntry(); + if (we == null) { + we = key.getMvcc().begin(); + key.setWriteEntry(we); + } return 1L; }