HBASE-727 Client caught in an infinite loop when trying to connect to cached server locations

git-svn-id: https://svn.apache.org/repos/asf/hadoop/hbase/trunk@675048 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael Stack 2008-07-08 23:20:02 +00:00
parent 6fe7bfb481
commit b022dd6a56
4 changed files with 25 additions and 6 deletions

View File

@ -169,6 +169,8 @@ Trunk (unreleased changes)
HBASE-703 Invalid regions listed by regionserver.jsp (Izaak Rubin via Stack) HBASE-703 Invalid regions listed by regionserver.jsp (Izaak Rubin via Stack)
HBASE-674 Memcache size unreliable HBASE-674 Memcache size unreliable
HBASE-726 Unit tests won't run because of a typo (Sebastien Rainville via Stack) HBASE-726 Unit tests won't run because of a typo (Sebastien Rainville via Stack)
HBASE-727 Client caught in an infinite loop when trying to connect to cached
server locations (Izaak Rubin via Stack)
IMPROVEMENTS IMPROVEMENTS
HBASE-559 MR example job to count table rows HBASE-559 MR example job to count table rows

View File

@ -118,6 +118,7 @@ public class HConnectionManager implements HConstants {
private final Class<? extends HRegionInterface> serverInterfaceClass; private final Class<? extends HRegionInterface> serverInterfaceClass;
private final long pause; private final long pause;
private final int numRetries; private final int numRetries;
private final int maxRPCAttempts;
private final Integer masterLock = new Integer(0); private final Integer masterLock = new Integer(0);
private volatile boolean closed; private volatile boolean closed;
@ -164,6 +165,7 @@ public class HConnectionManager implements HConstants {
this.pause = conf.getLong("hbase.client.pause", 30 * 1000); this.pause = conf.getLong("hbase.client.pause", 30 * 1000);
this.numRetries = conf.getInt("hbase.client.retries.number", 5); this.numRetries = conf.getInt("hbase.client.retries.number", 5);
this.maxRPCAttempts = conf.getInt("hbase.client.rpc.maxattempts", 1);
this.master = null; this.master = null;
this.masterChecked = false; this.masterChecked = false;
@ -761,7 +763,8 @@ public class HConnectionManager implements HConstants {
try { try {
server = (HRegionInterface)HbaseRPC.waitForProxy(serverInterfaceClass, server = (HRegionInterface)HbaseRPC.waitForProxy(serverInterfaceClass,
versionId, regionServer.getInetSocketAddress(), this.conf); versionId, regionServer.getInetSocketAddress(), this.conf,
this.maxRPCAttempts);
} catch (RemoteException e) { } catch (RemoteException e) {
throw RemoteExceptionHandler.decodeRemoteException(e); throw RemoteExceptionHandler.decodeRemoteException(e);
} }

View File

@ -28,6 +28,7 @@ import java.net.ConnectException;
import java.net.InetSocketAddress; import java.net.InetSocketAddress;
import java.net.SocketTimeoutException; import java.net.SocketTimeoutException;
import java.io.*; import java.io.*;
import java.util.ArrayList;
import java.util.Map; import java.util.Map;
import java.util.HashMap; import java.util.HashMap;
import java.util.Collection; import java.util.Collection;
@ -36,6 +37,7 @@ import javax.net.SocketFactory;
import org.apache.commons.logging.*; import org.apache.commons.logging.*;
import org.apache.hadoop.hbase.client.RetriesExhaustedException;
import org.apache.hadoop.hbase.io.HbaseObjectWritable; import org.apache.hadoop.hbase.io.HbaseObjectWritable;
import org.apache.hadoop.io.*; import org.apache.hadoop.io.*;
import org.apache.hadoop.ipc.Client; import org.apache.hadoop.ipc.Client;
@ -273,16 +275,28 @@ public class HbaseRPC {
} }
} }
/**
* @param maxAttempts the number of times that getProxy() should be called before
* giving up. If a negative number is passed, it will retry indefinitely.
*/
public static VersionedProtocol waitForProxy(Class protocol, public static VersionedProtocol waitForProxy(Class protocol,
long clientVersion, long clientVersion,
InetSocketAddress addr, InetSocketAddress addr,
Configuration conf Configuration conf,
) throws IOException { int maxAttempts) throws IOException {
while (true) { int reconnectAttempts = 0;
while (true) {
try { try {
return getProxy(protocol, clientVersion, addr, conf); return getProxy(protocol, clientVersion, addr, conf);
} catch(ConnectException se) { // namenode has not been started } catch(ConnectException se) { // namenode has not been started
LOG.info("Server at " + addr + " not available yet, Zzzzz..."); LOG.info("Server at " + addr + " not available yet, Zzzzz...");
if (maxAttempts >= 0 && ++reconnectAttempts >= maxAttempts) {
LOG.info("Server at " + addr + " could not be reached after " +
reconnectAttempts + " tries, giving up.");
throw new RetriesExhaustedException(addr.toString(), "unknown".getBytes(),
"unknown".getBytes(), reconnectAttempts - 1,
new ArrayList<Throwable>());
}
} catch(SocketTimeoutException te) { // namenode is busy } catch(SocketTimeoutException te) { // namenode is busy
LOG.info("Problem connecting to server: " + addr); LOG.info("Problem connecting to server: " + addr);
} }

View File

@ -700,11 +700,11 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
LOG.debug("Telling master at " + LOG.debug("Telling master at " +
conf.get(MASTER_ADDRESS) + " that we are up"); conf.get(MASTER_ADDRESS) + " that we are up");
} }
// Do initial RPC setup. // Do initial RPC setup. The final argument indicates that the RPC should retry indefinitely.
this.hbaseMaster = (HMasterRegionInterface)HbaseRPC.waitForProxy( this.hbaseMaster = (HMasterRegionInterface)HbaseRPC.waitForProxy(
HMasterRegionInterface.class, HMasterRegionInterface.versionID, HMasterRegionInterface.class, HMasterRegionInterface.versionID,
new HServerAddress(conf.get(MASTER_ADDRESS)).getInetSocketAddress(), new HServerAddress(conf.get(MASTER_ADDRESS)).getInetSocketAddress(),
this.conf); this.conf, -1);
MapWritable result = null; MapWritable result = null;
long lastMsg = 0; long lastMsg = 0;
while(!stopRequested.get()) { while(!stopRequested.get()) {