diff --git a/CHANGES.txt b/CHANGES.txt index bae6ff12dd4..a762cd981a3 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -169,6 +169,8 @@ Trunk (unreleased changes) HBASE-703 Invalid regions listed by regionserver.jsp (Izaak Rubin via Stack) HBASE-674 Memcache size unreliable HBASE-726 Unit tests won't run because of a typo (Sebastien Rainville via Stack) + HBASE-727 Client caught in an infinite loop when trying to connect to cached + server locations (Izaak Rubin via Stack) IMPROVEMENTS HBASE-559 MR example job to count table rows diff --git a/src/java/org/apache/hadoop/hbase/client/HConnectionManager.java b/src/java/org/apache/hadoop/hbase/client/HConnectionManager.java index 8dbb2cc8b9c..9d6a425d857 100644 --- a/src/java/org/apache/hadoop/hbase/client/HConnectionManager.java +++ b/src/java/org/apache/hadoop/hbase/client/HConnectionManager.java @@ -118,6 +118,7 @@ public class HConnectionManager implements HConstants { private final Class serverInterfaceClass; private final long pause; private final int numRetries; + private final int maxRPCAttempts; private final Integer masterLock = new Integer(0); private volatile boolean closed; @@ -164,6 +165,7 @@ public class HConnectionManager implements HConstants { this.pause = conf.getLong("hbase.client.pause", 30 * 1000); this.numRetries = conf.getInt("hbase.client.retries.number", 5); + this.maxRPCAttempts = conf.getInt("hbase.client.rpc.maxattempts", 1); this.master = null; this.masterChecked = false; @@ -761,7 +763,8 @@ public class HConnectionManager implements HConstants { try { server = (HRegionInterface)HbaseRPC.waitForProxy(serverInterfaceClass, - versionId, regionServer.getInetSocketAddress(), this.conf); + versionId, regionServer.getInetSocketAddress(), this.conf, + this.maxRPCAttempts); } catch (RemoteException e) { throw RemoteExceptionHandler.decodeRemoteException(e); } diff --git a/src/java/org/apache/hadoop/hbase/ipc/HbaseRPC.java b/src/java/org/apache/hadoop/hbase/ipc/HbaseRPC.java index 30c2b839cea..592d9e917bb 100644 --- a/src/java/org/apache/hadoop/hbase/ipc/HbaseRPC.java +++ b/src/java/org/apache/hadoop/hbase/ipc/HbaseRPC.java @@ -28,6 +28,7 @@ import java.net.ConnectException; import java.net.InetSocketAddress; import java.net.SocketTimeoutException; import java.io.*; +import java.util.ArrayList; import java.util.Map; import java.util.HashMap; import java.util.Collection; @@ -36,6 +37,7 @@ import javax.net.SocketFactory; import org.apache.commons.logging.*; +import org.apache.hadoop.hbase.client.RetriesExhaustedException; import org.apache.hadoop.hbase.io.HbaseObjectWritable; import org.apache.hadoop.io.*; import org.apache.hadoop.ipc.Client; @@ -273,16 +275,28 @@ public class HbaseRPC { } } + /** + * @param maxAttempts the number of times that getProxy() should be called before + * giving up. If a negative number is passed, it will retry indefinitely. + */ public static VersionedProtocol waitForProxy(Class protocol, long clientVersion, InetSocketAddress addr, - Configuration conf - ) throws IOException { - while (true) { + Configuration conf, + int maxAttempts) throws IOException { + int reconnectAttempts = 0; + while (true) { try { return getProxy(protocol, clientVersion, addr, conf); } catch(ConnectException se) { // namenode has not been started LOG.info("Server at " + addr + " not available yet, Zzzzz..."); + if (maxAttempts >= 0 && ++reconnectAttempts >= maxAttempts) { + LOG.info("Server at " + addr + " could not be reached after " + + reconnectAttempts + " tries, giving up."); + throw new RetriesExhaustedException(addr.toString(), "unknown".getBytes(), + "unknown".getBytes(), reconnectAttempts - 1, + new ArrayList()); + } } catch(SocketTimeoutException te) { // namenode is busy LOG.info("Problem connecting to server: " + addr); } diff --git a/src/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java b/src/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java index 7722af9dad7..e0c6af5fbe2 100644 --- a/src/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java +++ b/src/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java @@ -700,11 +700,11 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable { LOG.debug("Telling master at " + conf.get(MASTER_ADDRESS) + " that we are up"); } - // Do initial RPC setup. + // Do initial RPC setup. The final argument indicates that the RPC should retry indefinitely. this.hbaseMaster = (HMasterRegionInterface)HbaseRPC.waitForProxy( HMasterRegionInterface.class, HMasterRegionInterface.versionID, new HServerAddress(conf.get(MASTER_ADDRESS)).getInetSocketAddress(), - this.conf); + this.conf, -1); MapWritable result = null; long lastMsg = 0; while(!stopRequested.get()) {