diff --git a/CHANGES.txt b/CHANGES.txt index 0d59e27c780..f2c2850ab81 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -325,6 +325,7 @@ Release 0.21.0 - Unreleased HBASE-2382 Don't rely on fs.getDefaultReplication() to roll HLogs (Nicolas Spiegelberg via Stack) HBASE-2415 Disable META splitting in 0.20 (Todd Lipcon via Stack) + HBASE-2421 Put hangs for 10 retries on failed region servers IMPROVEMENTS HBASE-1760 Cleanup TODOs in HTable diff --git a/core/src/main/java/org/apache/hadoop/hbase/client/HConnection.java b/core/src/main/java/org/apache/hadoop/hbase/client/HConnection.java index 29b83245615..853164dfbdf 100644 --- a/core/src/main/java/org/apache/hadoop/hbase/client/HConnection.java +++ b/core/src/main/java/org/apache/hadoop/hbase/client/HConnection.java @@ -192,7 +192,7 @@ public interface HConnection { * @throws IOException if a remote or network exception occurs * @throws RuntimeException other unspecified error */ - public T getRegionServerForWithoutRetries(ServerCallable callable) + public T getRegionServerWithoutRetries(ServerCallable callable) throws IOException, RuntimeException; diff --git a/core/src/main/java/org/apache/hadoop/hbase/client/HConnectionManager.java b/core/src/main/java/org/apache/hadoop/hbase/client/HConnectionManager.java index 0205076a3d7..a83ce721ab5 100644 --- a/core/src/main/java/org/apache/hadoop/hbase/client/HConnectionManager.java +++ b/core/src/main/java/org/apache/hadoop/hbase/client/HConnectionManager.java @@ -680,7 +680,7 @@ public class HConnectionManager implements HConstants { // This block guards against two threads trying to load the meta // region at the same time. The first will load the meta region and // the second will use the value that the first one found. - synchronized(regionLockObject) { + synchronized (regionLockObject) { // Check the cache again for a hit in case some other thread made the // same query while we were waiting on the lock. If not supposed to // be using the cache, delete any existing cached location so it won't @@ -1077,15 +1077,19 @@ public class HConnectionManager implements HConstants { return null; } - public T getRegionServerForWithoutRetries(ServerCallable callable) + public T getRegionServerWithoutRetries(ServerCallable callable) throws IOException, RuntimeException { try { callable.instantiateServer(false); return callable.call(); } catch (Throwable t) { - t = translateException(t); + Throwable t2 = translateException(t); + if (t2 instanceof IOException) { + throw (IOException)t2; + } else { + throw new RuntimeException(t2); + } } - return null; } @SuppressWarnings({"ConstantConditions"}) @@ -1299,9 +1303,25 @@ public class HConnectionManager implements HConstants { } } - @SuppressWarnings({"ConstantConditions"}) + /** + * Process a batch of Puts on the given executor service. + * + * @param list the puts to make - successful puts will be removed. + * @param pool thread pool to execute requests on + * + * In the case of an exception, we take different actions depending on the + * situation: + * - If the exception is a DoNotRetryException, we rethrow it and leave the + * 'list' parameter in an indeterminate state. + * - If the 'list' parameter is a singleton, we directly throw the specific + * exception for that put. + * - Otherwise, we throw a generic exception indicating that an error occurred. + * The 'list' parameter is mutated to contain those puts that did not succeed. + */ public void processBatchOfPuts(List list, final byte[] tableName, ExecutorService pool) throws IOException { + boolean singletonList = list.size() == 1; + Throwable singleRowCause = null; for ( int tries = 0 ; tries < numRetries && !list.isEmpty(); ++tries) { Collections.sort(list); Map regionPuts = @@ -1367,10 +1387,19 @@ public class HConnectionManager implements HConstants { LOG.debug("Failed all from " + request.address, e); failed.addAll(request.allPuts()); } catch (ExecutionException e) { - System.out.println(e); // all go into the failed list. LOG.debug("Failed all from " + request.address, e); failed.addAll(request.allPuts()); + + // Just give up, leaving the batch put list in an untouched/semi-committed state + if (e.getCause() instanceof DoNotRetryIOException) { + throw (DoNotRetryIOException) e.getCause(); + } + + if (singletonList) { + // be richer for reporting in a 1 row case. + singleRowCause = e.getCause(); + } } } list.clear(); @@ -1391,9 +1420,13 @@ public class HConnectionManager implements HConstants { } } if (!list.isEmpty()) { + if (singletonList && singleRowCause != null) { + throw new IOException(singleRowCause); + } + // ran out of retries and didnt succeed everything! throw new RetriesExhaustedException("Still had " + list.size() + " puts left after retrying " + - numRetries + " times. Should have detail on which Regions failed the most"); + numRetries + " times."); } } @@ -1404,7 +1437,7 @@ public class HConnectionManager implements HConstants { final HConnection connection = this; return new Callable() { public MultiPutResponse call() throws IOException { - return getRegionServerWithRetries( + return getRegionServerWithoutRetries( new ServerCallable(connection, tableName, null) { public MultiPutResponse call() throws IOException { MultiPutResponse resp = server.multiPut(puts); diff --git a/core/src/main/java/org/apache/hadoop/hbase/client/HTable.java b/core/src/main/java/org/apache/hadoop/hbase/client/HTable.java index 5775a44b585..7ec95cb8723 100644 --- a/core/src/main/java/org/apache/hadoop/hbase/client/HTable.java +++ b/core/src/main/java/org/apache/hadoop/hbase/client/HTable.java @@ -542,8 +542,8 @@ public class HTable implements HTableInterface { } finally { // the write buffer was adjusted by processBatchOfPuts currentWriteBufferSize = 0; - for (Put aWriteBuffer : writeBuffer) { - currentWriteBufferSize += aWriteBuffer.heapSize(); + for (Put aPut : writeBuffer) { + currentWriteBufferSize += aPut.heapSize(); } } } diff --git a/core/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java b/core/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java index e6c8bc62e68..82e5771ce93 100644 --- a/core/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java +++ b/core/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java @@ -233,7 +233,7 @@ public class HRegionServer implements HConstants, HRegionInterface, // Run HDFS shutdown on exit if this is set. We clear this out when // doing a restart() to prevent closing of HDFS. - private final AtomicBoolean shutdownHDFS = new AtomicBoolean(true); + public final AtomicBoolean shutdownHDFS = new AtomicBoolean(true); private final String machineName; diff --git a/core/src/test/java/org/apache/hadoop/hbase/TestMultiParallelPut.java b/core/src/test/java/org/apache/hadoop/hbase/TestMultiParallelPut.java index 89cc86920d2..43924e87b2b 100644 --- a/core/src/test/java/org/apache/hadoop/hbase/TestMultiParallelPut.java +++ b/core/src/test/java/org/apache/hadoop/hbase/TestMultiParallelPut.java @@ -20,15 +20,15 @@ package org.apache.hadoop.hbase; +import org.apache.hadoop.hbase.client.Get; +import org.apache.hadoop.hbase.client.HBaseAdmin; import org.apache.hadoop.hbase.client.HTable; import org.apache.hadoop.hbase.client.Put; -import org.apache.hadoop.hbase.client.Get; import org.apache.hadoop.hbase.client.Result; -import org.apache.hadoop.hbase.client.HBaseAdmin; import org.apache.hadoop.hbase.util.Bytes; -import java.util.List; import java.util.ArrayList; +import java.util.List; public class TestMultiParallelPut extends MultiRegionTable { private static final byte[] VALUE = Bytes.toBytes("value"); @@ -58,7 +58,14 @@ public class TestMultiParallelPut extends MultiRegionTable { List keys = new ArrayList(); - public void testMultiPut() throws Exception { + public void testParallelPut() throws Exception { + doATest(false); + } + public void testParallelPutWithRSAbort() throws Exception { + doATest(true); + } + + public void doATest(boolean doAbort) throws Exception { HTable table = new HTable(TEST_TABLE); table.setAutoFlush(false); @@ -73,6 +80,19 @@ public class TestMultiParallelPut extends MultiRegionTable { table.flushCommits(); + if (doAbort) { + cluster.abortRegionServer(0); + + // try putting more keys after the abort. + for ( byte [] k : keys ) { + Put put = new Put(k); + put.add(BYTES_FAMILY, QUALIFIER, VALUE); + + table.put(put); + } + table.flushCommits(); + } + for (byte [] k : keys ) { Get get = new Get(k); get.addColumn(BYTES_FAMILY, QUALIFIER); @@ -88,10 +108,15 @@ public class TestMultiParallelPut extends MultiRegionTable { HBaseAdmin admin = new HBaseAdmin(conf); ClusterStatus cs = admin.getClusterStatus(); - assertEquals(2, cs.getServers()); + int expectedServerCount = 2; + if (doAbort) + expectedServerCount = 1; + + assertEquals(expectedServerCount, cs.getServers()); for ( HServerInfo info : cs.getServerInfo()) { System.out.println(info); assertTrue( info.getLoad().getNumberOfRegions() > 10); } } + }