HBASE-3074 Zookeeper test failing on hudson

HConnections go stale if miniCluster is shutdown on them.  Make it
so we make new HConnections on other side of a new cluster spin up.
Was cause for failing of last few tests.

M src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java
  So if you shutdown a cluster and you have HConnections that were
  made using the HTUs Configuration, they'll all be stale --
  session expired -- and won't be of any use connecting to new cluster
  that comes up... now that HConnection is first class dependent on
  its own ZooKeeperWatcher instance.
  So, I went through this class and fixed it up so we use new
  Configurations when could be called either side of a mini cluster
  restart.  Added note to getConfiguration to be careful what you
  do with it because Configuration makes for an HConnection.
M src/test/java/org/apache/hadoop/hbase/TestZooKeeper.java
  Don't use same Configuration everywhere.
M src/test/java/org/apache/hadoop/hbase/master/TestRestartCluster.java
  Make new Configuration when we make new Cluster so we get fresh
  HConnection -- not one w/ stale data, disconnected.
M src/main/java/org/apache/hadoop/hbase/master/ServerManager.java
  Check that new server that comes in on a regionServerReport
  doesn't have same port and host as one we have already registered
  as we do in reportForDuty call (This is preemtive bug fix).
M src/main/java/org/apache/hadoop/hbase/client/HConnectionManager.java
  Save of an identifier for HCM and use it in log messages, particularly
  when we close -- helps debugging.
M src/main/java/org/apache/hadoop/hbase/client/HBaseAdmin.java
  getConfiguration added.


git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1005714 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael Stack 2010-10-08 06:04:35 +00:00
parent d65d011907
commit ff2d19229f
7 changed files with 106 additions and 75 deletions

View File

@ -1030,6 +1030,13 @@ public class HBaseAdmin implements Abortable {
HRegionInfo.createRegionName(tableName, null, HConstants.NINES, false));
}
/**
* @return Configuration used by the instance.
*/
public Configuration getConfiguration() {
return this.conf;
}
/**
* Check to see if HBase is running. Throw an exception if not.
*

View File

@ -209,6 +209,7 @@ public class HConnectionManager {
new ConcurrentHashMap<String, HRegionInterface>();
private final RootRegionTracker rootRegionTracker;
private final String identifier;
/**
* Map of table to table {@link HRegionLocation}s. The table key is made
@ -259,6 +260,7 @@ public class HConnectionManager {
// initialize zookeeper and master address manager
this.zooKeeper = getZooKeeperWatcher();
this.identifier = this.zooKeeper.toString();
masterAddressTracker = new MasterAddressTracker(this.zooKeeper, this);
zooKeeper.registerListener(masterAddressTracker);
masterAddressTracker.start();
@ -272,8 +274,7 @@ public class HConnectionManager {
@Override
public String toString() {
// Return our zk identifier ... it 'hconnection + zk sessionid'.
return this.zooKeeper.toString();
return this.identifier;
}
private long getPauseTime(int tries) {
@ -516,7 +517,7 @@ public class HConnectionManager {
private HRegionLocation locateRegion(final byte [] tableName,
final byte [] row, boolean useCache)
throws IOException {
if (this.closed) throw new IOException("closed");
if (this.closed) throw new IOException(toString() + " closed");
if (tableName == null || tableName.length == 0) {
throw new IllegalArgumentException(
"table name cannot be null or zero length");

View File

@ -153,21 +153,32 @@ public class ServerManager {
// in, it should have been removed from serverAddressToServerInfo and queued
// for processing by ProcessServerShutdown.
HServerInfo info = new HServerInfo(serverInfo);
String hostAndPort = info.getServerAddress().toString();
checkIsDead(info.getServerName(), "STARTUP");
checkAlreadySameHostPort(info);
recordNewServer(info, false, null);
}
/**
* Test to see if we have a server of same host and port already.
* @param serverInfo
* @throws PleaseHoldException
*/
void checkAlreadySameHostPort(final HServerInfo serverInfo)
throws PleaseHoldException {
String hostAndPort = serverInfo.getServerAddress().toString();
HServerInfo existingServer =
haveServerWithSameHostAndPortAlready(info.getHostnamePort());
haveServerWithSameHostAndPortAlready(serverInfo.getHostnamePort());
if (existingServer != null) {
String message = "Server start rejected; we already have " + hostAndPort +
" registered; existingServer=" + existingServer + ", newServer=" + info;
" registered; existingServer=" + existingServer + ", newServer=" + serverInfo;
LOG.info(message);
if (existingServer.getStartCode() < info.getStartCode()) {
LOG.info("Triggering server recovery; existingServer looks stale");
if (existingServer.getStartCode() < serverInfo.getStartCode()) {
LOG.info("Triggering server recovery; existingServer " +
existingServer.getServerName() + " looks stale");
expireServer(existingServer);
}
throw new PleaseHoldException(message);
}
checkIsDead(info.getServerName(), "STARTUP");
recordNewServer(info, false, null);
}
private HServerInfo haveServerWithSameHostAndPortAlready(final String hostnamePort) {
@ -248,25 +259,24 @@ public class ServerManager {
// If we don't know this server, tell it shutdown.
HServerInfo storedInfo = this.onlineServers.get(info.getServerName());
if (storedInfo == null) {
if (this.deadservers.contains(storedInfo)) {
LOG.warn("Report from deadserver " + storedInfo);
return HMsg.STOP_REGIONSERVER_ARRAY;
} else {
// Just let the server in. Presume master joining a running cluster.
// recordNewServer is what happens at the end of reportServerStartup.
// The only thing we are skipping is passing back to the regionserver
// the HServerInfo to use. Here we presume a master has already done
// that so we'll press on with whatever it gave us for HSI.
recordNewServer(info, true, null);
// If msgs, put off their processing but this is not enough because
// its possible that the next time the server reports in, we'll still
// not be up and serving. For example, if a split, we'll need the
// regions and servers setup in the master before the below
// handleSplitReport will work. TODO: FIx!!
if (msgs.length > 0) throw new PleaseHoldException("FIX! Putting off " +
// Maybe we already have this host+port combo and its just different
// start code?
checkAlreadySameHostPort(info);
// Just let the server in. Presume master joining a running cluster.
// recordNewServer is what happens at the end of reportServerStartup.
// The only thing we are skipping is passing back to the regionserver
// the HServerInfo to use. Here we presume a master has already done
// that so we'll press on with whatever it gave us for HSI.
recordNewServer(info, true, null);
// If msgs, put off their processing but this is not enough because
// its possible that the next time the server reports in, we'll still
// not be up and serving. For example, if a split, we'll need the
// regions and servers setup in the master before the below
// handleSplitReport will work. TODO: FIx!!
if (msgs.length > 0)
throw new PleaseHoldException("FIX! Putting off " +
"message processing because not yet rwady but possible we won't be " +
"ready next on next report");
}
}
// Check startcodes

View File

@ -83,14 +83,13 @@ import com.google.common.base.Preconditions;
*/
public class HBaseTestingUtility {
private final static Log LOG = LogFactory.getLog(HBaseTestingUtility.class);
private final Configuration conf;
private Configuration conf;
private MiniZooKeeperCluster zkCluster = null;
private MiniDFSCluster dfsCluster = null;
private MiniHBaseCluster hbaseCluster = null;
private MiniMRCluster mrCluster = null;
// If non-null, then already a cluster running.
private File clusterTestBuildDir = null;
private HBaseAdmin hbaseAdmin = null;
/**
* System property key to get test directory value.
@ -112,6 +111,14 @@ public class HBaseTestingUtility {
}
/**
* Returns this classes's instance of {@link Configuration}. Be careful how
* you use the returned Configuration since {@link HConnection} instances
* can be shared. The Map of HConnections is keyed by the Configuration. If
* say, a Connection was being used against a cluster that had been shutdown,
* see {@link #shutdownMiniCluster()}, then the Connection will no longer
* be wholesome. Rather than use the return direct, its usually best to
* make a copy and use that. Do
* <code>Configuration c = new Configuration(INSTANCE.getConfiguration());</code>
* @return Instance of Configuration.
*/
public Configuration getConfiguration() {
@ -340,9 +347,10 @@ public class HBaseTestingUtility {
this.conf.set(HConstants.HBASE_DIR, hbaseRootdir.toString());
fs.mkdirs(hbaseRootdir);
FSUtils.setVersion(fs, hbaseRootdir);
this.hbaseCluster = new MiniHBaseCluster(this.conf, numMasters, numSlaves);
Configuration c = new Configuration(this.conf);
this.hbaseCluster = new MiniHBaseCluster(c, numMasters, numSlaves);
// Don't leave here till we've done a successful scan of the .META.
HTable t = new HTable(this.conf, HConstants.META_TABLE_NAME);
HTable t = new HTable(c, HConstants.META_TABLE_NAME);
ResultScanner s = t.getScanner(new Scan());
while (s.next() != null) {
continue;
@ -360,7 +368,7 @@ public class HBaseTestingUtility {
public void restartHBaseCluster(int servers) throws IOException {
this.hbaseCluster = new MiniHBaseCluster(this.conf, servers);
// Don't leave here till we've done a successful scan of the .META.
HTable t = new HTable(this.conf, HConstants.META_TABLE_NAME);
HTable t = new HTable(new Configuration(this.conf), HConstants.META_TABLE_NAME);
ResultScanner s = t.getScanner(new Scan());
while (s.next() != null) {
continue;
@ -447,8 +455,8 @@ public class HBaseTestingUtility {
for(byte[] family : families) {
desc.addFamily(new HColumnDescriptor(family));
}
(new HBaseAdmin(getConfiguration())).createTable(desc);
return new HTable(getConfiguration(), tableName);
getHBaseAdmin().createTable(desc);
return new HTable(new Configuration(getConfiguration()), tableName);
}
/**
@ -486,8 +494,8 @@ public class HBaseTestingUtility {
HColumnDescriptor.DEFAULT_REPLICATION_SCOPE);
desc.addFamily(hcd);
}
(new HBaseAdmin(getConfiguration())).createTable(desc);
return new HTable(getConfiguration(), tableName);
getHBaseAdmin().createTable(desc);
return new HTable(new Configuration(getConfiguration()), tableName);
}
/**
@ -514,8 +522,8 @@ public class HBaseTestingUtility {
desc.addFamily(hcd);
i++;
}
(new HBaseAdmin(getConfiguration())).createTable(desc);
return new HTable(getConfiguration(), tableName);
getHBaseAdmin().createTable(desc);
return new HTable(new Configuration(getConfiguration()), tableName);
}
/**
@ -711,7 +719,8 @@ public class HBaseTestingUtility {
* @throws IOException When reading the rows fails.
*/
public List<byte[]> getMetaTableRows() throws IOException {
HTable t = new HTable(this.conf, HConstants.META_TABLE_NAME);
// TODO: Redo using MetaReader class
HTable t = new HTable(new Configuration(this.conf), HConstants.META_TABLE_NAME);
List<byte[]> rows = new ArrayList<byte[]>();
ResultScanner s = t.getScanner(new Scan());
for (Result result : s) {
@ -729,7 +738,8 @@ public class HBaseTestingUtility {
* @throws IOException When reading the rows fails.
*/
public List<byte[]> getMetaTableRows(byte[] tableName) throws IOException {
HTable t = new HTable(this.conf, HConstants.META_TABLE_NAME);
// TODO: Redo using MetaReader.
HTable t = new HTable(new Configuration(this.conf), HConstants.META_TABLE_NAME);
List<byte[]> rows = new ArrayList<byte[]>();
ResultScanner s = t.getScanner(new Scan());
for (Result result : s) {
@ -843,22 +853,23 @@ public class HBaseTestingUtility {
public void expireSession(ZooKeeperWatcher nodeZK, Server server)
throws Exception {
String quorumServers = ZKConfig.getZKQuorumServersString(conf);
Configuration c = new Configuration(this.conf);
String quorumServers = ZKConfig.getZKQuorumServersString(c);
int sessionTimeout = 5 * 1000; // 5 seconds
ZooKeeper zk = nodeZK.getZooKeeper();
byte[] password = zk.getSessionPasswd();
long sessionID = zk.getSessionId();
byte[] password = nodeZK.getZooKeeper().getSessionPasswd();
long sessionID = nodeZK.getZooKeeper().getSessionId();
ZooKeeper zk = new ZooKeeper(quorumServers,
ZooKeeper newZK = new ZooKeeper(quorumServers,
sessionTimeout, EmptyWatcher.instance, sessionID, password);
zk.close();
newZK.close();
final long sleep = sessionTimeout * 5L;
LOG.info("ZK Closed Session 0x" + Long.toHexString(sessionID) +
"; sleeping=" + sleep);
Thread.sleep(sleep);
new HTable(conf, HConstants.META_TABLE_NAME);
new HTable(new Configuration(conf), HConstants.META_TABLE_NAME);
}
/**
@ -878,10 +889,7 @@ public class HBaseTestingUtility {
*/
public HBaseAdmin getHBaseAdmin()
throws IOException {
if (hbaseAdmin == null) {
hbaseAdmin = new HBaseAdmin(getConfiguration());
}
return hbaseAdmin;
return new HBaseAdmin(new Configuration(getConfiguration()));
}
/**
@ -976,7 +984,7 @@ public class HBaseTestingUtility {
public void waitTableAvailable(byte[] table, long timeoutMillis)
throws InterruptedException, IOException {
HBaseAdmin admin = new HBaseAdmin(conf);
HBaseAdmin admin = getHBaseAdmin();
long startWait = System.currentTimeMillis();
while (!admin.isTableAvailable(table)) {
assertTrue("Timed out waiting for table " + Bytes.toStringBinary(table),

View File

@ -40,10 +40,8 @@ import org.apache.hadoop.hbase.zookeeper.ZKConfig;
import org.apache.hadoop.hbase.zookeeper.ZKUtil;
import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
import org.apache.zookeeper.KeeperException;
import org.apache.zookeeper.Watcher;
import org.apache.zookeeper.ZooKeeper;
import org.apache.zookeeper.ZooKeeper.States;
import org.apache.zookeeper.proto.WatcherEvent;
import org.junit.AfterClass;
import org.junit.Assert;
import org.junit.Before;
@ -56,8 +54,6 @@ public class TestZooKeeper {
private final static HBaseTestingUtility
TEST_UTIL = new HBaseTestingUtility();
private static Configuration conf;
/**
* @throws java.lang.Exception
*/
@ -67,7 +63,6 @@ public class TestZooKeeper {
TEST_UTIL.startMiniZKCluster();
TEST_UTIL.getConfiguration().setBoolean("dfs.support.append", true);
TEST_UTIL.startMiniCluster(2);
conf = TEST_UTIL.getConfiguration();
}
/**
@ -93,8 +88,9 @@ public class TestZooKeeper {
*/
@Test
public void testClientSessionExpired()
throws IOException, InterruptedException {
Configuration c = new Configuration(this.conf);
throws IOException, InterruptedException {
LOG.info("testClientSessionExpired");
Configuration c = new Configuration(TEST_UTIL.getConfiguration());
new HTable(c, HConstants.META_TABLE_NAME);
String quorumServers = ZKConfig.getZKQuorumServersString(c);
int sessionTimeout = 5 * 1000; // 5 seconds
@ -102,7 +98,6 @@ public class TestZooKeeper {
ZooKeeperWatcher connectionZK = connection.getZooKeeperWatcher();
long sessionID = connectionZK.getZooKeeper().getSessionId();
byte[] password = connectionZK.getZooKeeper().getSessionPasswd();
ZooKeeper zk = new ZooKeeper(quorumServers, sessionTimeout,
EmptyWatcher.instance, sessionID, password);
zk.close();
@ -110,14 +105,13 @@ public class TestZooKeeper {
Thread.sleep(sessionTimeout * 3L);
System.err.println("ZooKeeper should have timed out");
Assert.assertTrue(connection.getZooKeeperWatcher().getZooKeeper().
getState().equals(States.CLOSED));
LOG.info("state=" + connectionZK.getZooKeeper().getState());
Assert.assertTrue(connectionZK.getZooKeeper().getState().equals(States.CLOSED));
}
@Test
public void testRegionServerSessionExpired() throws Exception {
LOG.info("Starting testRegionServerSessionExpired");
new HTable(conf, HConstants.META_TABLE_NAME);
int metaIndex = TEST_UTIL.getMiniHBaseCluster().getServerWithMeta();
TEST_UTIL.expireRegionServerSession(metaIndex);
testSanity();
@ -126,7 +120,6 @@ public class TestZooKeeper {
//@Test
public void disabledTestMasterSessionExpired() throws Exception {
LOG.info("Starting testMasterSessionExpired");
new HTable(conf, HConstants.META_TABLE_NAME);
TEST_UTIL.expireMasterSession();
testSanity();
}
@ -136,7 +129,8 @@ public class TestZooKeeper {
* @throws Exception
*/
public void testSanity() throws Exception{
HBaseAdmin admin = new HBaseAdmin(conf);
HBaseAdmin admin =
new HBaseAdmin(new Configuration(TEST_UTIL.getConfiguration()));
String tableName = "test"+System.currentTimeMillis();
HTableDescriptor desc = new HTableDescriptor(tableName);
HColumnDescriptor family = new HColumnDescriptor("fam");
@ -144,7 +138,8 @@ public class TestZooKeeper {
LOG.info("Creating table " + tableName);
admin.createTable(desc);
HTable table = new HTable(conf, tableName);
HTable table =
new HTable(new Configuration(TEST_UTIL.getConfiguration()), tableName);
Put put = new Put(Bytes.toBytes("testrow"));
put.add(Bytes.toBytes("fam"),
Bytes.toBytes("col"), Bytes.toBytes("testdata"));
@ -156,8 +151,9 @@ public class TestZooKeeper {
@Test
public void testMultipleZK() {
try {
HTable localMeta = new HTable(conf, HConstants.META_TABLE_NAME);
Configuration otherConf = HBaseConfiguration.create(conf);
HTable localMeta =
new HTable(new Configuration(TEST_UTIL.getConfiguration()), HConstants.META_TABLE_NAME);
Configuration otherConf = new Configuration(TEST_UTIL.getConfiguration());
otherConf.set(HConstants.ZOOKEEPER_QUORUM, "127.0.0.1");
HTable ipMeta = new HTable(otherConf, HConstants.META_TABLE_NAME);
@ -166,9 +162,9 @@ public class TestZooKeeper {
ipMeta.exists(new Get(HConstants.LAST_ROW));
// make sure they aren't the same
assertFalse(HConnectionManager.getConnection(conf).getZooKeeperWatcher()
assertFalse(HConnectionManager.getConnection(localMeta.getConfiguration()).getZooKeeperWatcher()
== HConnectionManager.getConnection(otherConf).getZooKeeperWatcher());
assertFalse(HConnectionManager.getConnection(conf)
assertFalse(HConnectionManager.getConnection(localMeta.getConfiguration())
.getZooKeeperWatcher().getQuorum().equals(HConnectionManager
.getConnection(otherConf).getZooKeeperWatcher().getQuorum()));
} catch (Exception e) {
@ -184,8 +180,9 @@ public class TestZooKeeper {
*/
@Test
public void testZNodeDeletes() throws Exception {
ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf,
TestZooKeeper.class.getName(), null);
ZooKeeperWatcher zkw = new ZooKeeperWatcher(
new Configuration(TEST_UTIL.getConfiguration()),
TestZooKeeper.class.getName(), null);
ZKUtil.createWithParents(zkw, "/l1/l2/l3/l4");
try {
ZKUtil.deleteNode(zkw, "/l1/l2");

View File

@ -32,6 +32,7 @@ import org.apache.hadoop.hbase.HBaseTestingUtility;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.HServerAddress;
import org.apache.hadoop.hbase.client.HBaseAdmin;
import org.apache.hadoop.hbase.client.HConnection;
import org.apache.hadoop.hbase.client.HConnectionManager;
import org.apache.hadoop.hbase.client.HTable;
@ -91,8 +92,10 @@ public class TestMetaReaderEditor {
assertFalse(MetaReader.tableExists(CT, name));
UTIL.createTable(nameBytes, HConstants.CATALOG_FAMILY);
assertTrue(MetaReader.tableExists(CT, name));
UTIL.getHBaseAdmin().disableTable(name);
UTIL.getHBaseAdmin().deleteTable(name);
HBaseAdmin admin = UTIL.getHBaseAdmin();
admin.disableTable(name);
admin.deleteTable(name);
admin.close();
assertFalse(MetaReader.tableExists(CT, name));
assertTrue(MetaReader.tableExists(CT,
Bytes.toString(HConstants.META_TABLE_NAME)));

View File

@ -27,6 +27,7 @@ import java.util.List;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseTestingUtility;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.TableExistsException;
@ -113,7 +114,11 @@ public class TestRestartCluster {
LOG.info("\n\nStarting cluster the second time");
UTIL.restartHBaseCluster(3);
allRegions = MetaScanner.listAllRegions(UTIL.getConfiguration());
// Need to use a new 'Configuration' so we make a new HConnection.
// Otherwise we're reusing an HConnection that has gone stale because
// the shutdown of the cluster also called shut of the connection.
allRegions = MetaScanner.
listAllRegions(new Configuration(UTIL.getConfiguration()));
assertEquals(3, allRegions.size());
LOG.info("\n\nWaiting for tables to be available");