HBASE-17158 Avoid deadlock caused by HRegion#doDelta (ChiaPing Tsai)
This commit is contained in:
parent
511398f43a
commit
9f5b8a83b7
|
@ -349,6 +349,9 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi
|
||||||
|
|
||||||
private volatile Optional<ConfigurationManager> configurationManager;
|
private volatile Optional<ConfigurationManager> configurationManager;
|
||||||
|
|
||||||
|
// Used for testing.
|
||||||
|
private volatile Long timeoutForWriteLock = null;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @return The smallest mvcc readPoint across all the scanners in this
|
* @return The smallest mvcc readPoint across all the scanners in this
|
||||||
* region. Writes older than this readPoint, are included in every
|
* region. Writes older than this readPoint, are included in every
|
||||||
|
@ -1446,6 +1449,17 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi
|
||||||
this.closing.set(closing);
|
this.closing.set(closing);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The {@link HRegion#doClose} will block forever if someone tries proving the dead lock via the unit test.
|
||||||
|
* Instead of blocking, the {@link HRegion#doClose} will throw exception if you set the timeout.
|
||||||
|
* @param timeoutForWriteLock the second time to wait for the write lock in {@link HRegion#doClose}
|
||||||
|
*/
|
||||||
|
@VisibleForTesting
|
||||||
|
public void setTimeoutForWriteLock(long timeoutForWriteLock) {
|
||||||
|
assert timeoutForWriteLock >= 0;
|
||||||
|
this.timeoutForWriteLock = timeoutForWriteLock;
|
||||||
|
}
|
||||||
|
|
||||||
@edu.umd.cs.findbugs.annotations.SuppressWarnings(value="UL_UNRELEASED_LOCK_EXCEPTION_PATH",
|
@edu.umd.cs.findbugs.annotations.SuppressWarnings(value="UL_UNRELEASED_LOCK_EXCEPTION_PATH",
|
||||||
justification="I think FindBugs is confused")
|
justification="I think FindBugs is confused")
|
||||||
private Map<byte[], List<StoreFile>> doClose(final boolean abort, MonitoredTask status)
|
private Map<byte[], List<StoreFile>> doClose(final boolean abort, MonitoredTask status)
|
||||||
|
@ -1484,8 +1498,20 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// block waiting for the lock for closing
|
if (timeoutForWriteLock == null
|
||||||
lock.writeLock().lock(); // FindBugs: Complains UL_UNRELEASED_LOCK_EXCEPTION_PATH but seems fine
|
|| timeoutForWriteLock == Long.MAX_VALUE) {
|
||||||
|
// block waiting for the lock for closing
|
||||||
|
lock.writeLock().lock(); // FindBugs: Complains UL_UNRELEASED_LOCK_EXCEPTION_PATH but seems fine
|
||||||
|
} else {
|
||||||
|
try {
|
||||||
|
boolean succeed = lock.writeLock().tryLock(timeoutForWriteLock, TimeUnit.SECONDS);
|
||||||
|
if (!succeed) {
|
||||||
|
throw new IOException("Failed to get write lock when closing region");
|
||||||
|
}
|
||||||
|
} catch (InterruptedException e) {
|
||||||
|
throw (InterruptedIOException) new InterruptedIOException().initCause(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
this.closing.set(true);
|
this.closing.set(true);
|
||||||
status.setStatus("Disabling writes for close");
|
status.setStatus("Disabling writes for close");
|
||||||
try {
|
try {
|
||||||
|
@ -5345,6 +5371,11 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@VisibleForTesting
|
||||||
|
public int getReadLockCount() {
|
||||||
|
return lock.getReadLockCount();
|
||||||
|
}
|
||||||
|
|
||||||
public ConcurrentHashMap<HashedBytes, RowLockContext> getLockedRows() {
|
public ConcurrentHashMap<HashedBytes, RowLockContext> getLockedRows() {
|
||||||
return lockedRows;
|
return lockedRows;
|
||||||
}
|
}
|
||||||
|
@ -7257,9 +7288,10 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi
|
||||||
WriteEntry writeEntry = null;
|
WriteEntry writeEntry = null;
|
||||||
startRegionOperation(op);
|
startRegionOperation(op);
|
||||||
List<Cell> results = returnResults? new ArrayList<Cell>(mutation.size()): null;
|
List<Cell> results = returnResults? new ArrayList<Cell>(mutation.size()): null;
|
||||||
RowLock rowLock = getRowLockInternal(mutation.getRow(), false);
|
RowLock rowLock = null;
|
||||||
MemstoreSize memstoreSize = new MemstoreSize();
|
MemstoreSize memstoreSize = new MemstoreSize();
|
||||||
try {
|
try {
|
||||||
|
rowLock = getRowLockInternal(mutation.getRow(), false);
|
||||||
lock(this.updatesLock.readLock());
|
lock(this.updatesLock.readLock());
|
||||||
try {
|
try {
|
||||||
Result cpResult = doCoprocessorPreCall(op, mutation);
|
Result cpResult = doCoprocessorPreCall(op, mutation);
|
||||||
|
@ -7307,7 +7339,9 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi
|
||||||
// the client. Means only way to read-your-own-increment or append is to come in with an
|
// the client. Means only way to read-your-own-increment or append is to come in with an
|
||||||
// a 0 increment.
|
// a 0 increment.
|
||||||
if (writeEntry != null) mvcc.complete(writeEntry);
|
if (writeEntry != null) mvcc.complete(writeEntry);
|
||||||
rowLock.release();
|
if (rowLock != null) {
|
||||||
|
rowLock.release();
|
||||||
|
}
|
||||||
// Request a cache flush if over the limit. Do it outside update lock.
|
// Request a cache flush if over the limit. Do it outside update lock.
|
||||||
if (isFlushSize(addAndGetMemstoreSize(memstoreSize))) {
|
if (isFlushSize(addAndGetMemstoreSize(memstoreSize))) {
|
||||||
requestFlush();
|
requestFlush();
|
||||||
|
|
|
@ -23,14 +23,11 @@ import java.io.IOException;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
|
||||||
import java.util.Random;
|
import java.util.Random;
|
||||||
import java.util.concurrent.CountDownLatch;
|
import java.util.concurrent.CountDownLatch;
|
||||||
import java.util.concurrent.ExecutorService;
|
import java.util.concurrent.ExecutorService;
|
||||||
import java.util.concurrent.Executors;
|
import java.util.concurrent.Executors;
|
||||||
import java.util.concurrent.TimeUnit;
|
import java.util.concurrent.TimeUnit;
|
||||||
import java.util.logging.Level;
|
|
||||||
import java.util.logging.Logger;
|
|
||||||
import org.apache.commons.logging.Log;
|
import org.apache.commons.logging.Log;
|
||||||
import org.apache.commons.logging.LogFactory;
|
import org.apache.commons.logging.LogFactory;
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
@ -57,7 +54,6 @@ import org.apache.hadoop.hbase.protobuf.generated.MultiRowMutationProtos;
|
||||||
import org.apache.hadoop.hbase.regionserver.HRegion;
|
import org.apache.hadoop.hbase.regionserver.HRegion;
|
||||||
import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
|
import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
|
||||||
import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos;
|
import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos;
|
||||||
import org.apache.hadoop.hbase.shaded.protobuf.generated.ClientProtos;
|
|
||||||
import org.apache.hadoop.hbase.testclassification.ClientTests;
|
import org.apache.hadoop.hbase.testclassification.ClientTests;
|
||||||
import org.apache.hadoop.hbase.testclassification.LargeTests;
|
import org.apache.hadoop.hbase.testclassification.LargeTests;
|
||||||
import org.apache.hadoop.hbase.util.Bytes;
|
import org.apache.hadoop.hbase.util.Bytes;
|
||||||
|
@ -567,6 +563,59 @@ public class TestFromClientSide3 {
|
||||||
TEST_UTIL.deleteTable(tableName);
|
TEST_UTIL.deleteTable(tableName);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test(timeout = 30000)
|
||||||
|
public void testLockLeakWithDelta() throws Exception, Throwable {
|
||||||
|
TableName tableName = TableName.valueOf("testLockLeakWithDelta");
|
||||||
|
HTableDescriptor desc = new HTableDescriptor(tableName);
|
||||||
|
desc.addCoprocessor(WatiingForMultiMutationsObserver.class.getName());
|
||||||
|
desc.setConfiguration("hbase.rowlock.wait.duration", String.valueOf(5000));
|
||||||
|
desc.addFamily(new HColumnDescriptor(FAMILY));
|
||||||
|
TEST_UTIL.getAdmin().createTable(desc);
|
||||||
|
// new a connection for lower retry number.
|
||||||
|
Configuration copy = new Configuration(TEST_UTIL.getConfiguration());
|
||||||
|
copy.setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 2);
|
||||||
|
try (Connection con = ConnectionFactory.createConnection(copy)) {
|
||||||
|
HRegion region = (HRegion) find(tableName);
|
||||||
|
region.setTimeoutForWriteLock(10);
|
||||||
|
ExecutorService putService = Executors.newSingleThreadExecutor();
|
||||||
|
putService.execute(() -> {
|
||||||
|
try (Table table = con.getTable(tableName)) {
|
||||||
|
Put put = new Put(ROW);
|
||||||
|
put.addColumn(FAMILY, QUALIFIER, VALUE);
|
||||||
|
// the put will be blocked by WatiingForMultiMutationsObserver.
|
||||||
|
table.put(put);
|
||||||
|
} catch (IOException ex) {
|
||||||
|
throw new RuntimeException(ex);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
ExecutorService appendService = Executors.newSingleThreadExecutor();
|
||||||
|
appendService.execute(() -> {
|
||||||
|
Append append = new Append(ROW);
|
||||||
|
append.add(FAMILY, QUALIFIER, VALUE);
|
||||||
|
try (Table table = con.getTable(tableName)) {
|
||||||
|
table.append(append);
|
||||||
|
fail("The APPEND should fail because the target lock is blocked by previous put");
|
||||||
|
} catch (Throwable ex) {
|
||||||
|
}
|
||||||
|
});
|
||||||
|
appendService.shutdown();
|
||||||
|
appendService.awaitTermination(Long.MAX_VALUE, TimeUnit.DAYS);
|
||||||
|
WatiingForMultiMutationsObserver observer = find(tableName, WatiingForMultiMutationsObserver.class);
|
||||||
|
observer.latch.countDown();
|
||||||
|
putService.shutdown();
|
||||||
|
putService.awaitTermination(Long.MAX_VALUE, TimeUnit.DAYS);
|
||||||
|
try (Table table = con.getTable(tableName)) {
|
||||||
|
Result r = table.get(new Get(ROW));
|
||||||
|
assertFalse(r.isEmpty());
|
||||||
|
assertTrue(Bytes.equals(r.getValue(FAMILY, QUALIFIER), VALUE));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
HRegion region = (HRegion) find(tableName);
|
||||||
|
int readLockCount = region.getReadLockCount();
|
||||||
|
LOG.info("readLockCount:" + readLockCount);
|
||||||
|
assertEquals(0, readLockCount);
|
||||||
|
}
|
||||||
|
|
||||||
@Test(timeout = 30000)
|
@Test(timeout = 30000)
|
||||||
public void testMultiRowMutations() throws Exception, Throwable {
|
public void testMultiRowMutations() throws Exception, Throwable {
|
||||||
TableName tableName = TableName.valueOf("testMultiRowMutations");
|
TableName tableName = TableName.valueOf("testMultiRowMutations");
|
||||||
|
|
Loading…
Reference in New Issue