diff --git a/CHANGES.txt b/CHANGES.txt
index 83b6635c22c..f5e1c3b6253 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -423,6 +423,8 @@ Release 0.21.0 - Unreleased
no progress running YCSB on clean cluster startup
HBASE-2785 TestScannerTimeout.test2772 is flaky
HBASE-2787 PE is confused about flushCommits
+ HBASE-2707 Can't recover from a dead ROOT server if any exceptions happens
+ during log splitting
IMPROVEMENTS
HBASE-1760 Cleanup TODOs in HTable
diff --git a/src/assembly/bin.xml b/src/assembly/bin.xml
index b895fc57406..bf772f0977e 100644
--- a/src/assembly/bin.xml
+++ b/src/assembly/bin.xml
@@ -28,6 +28,7 @@
/
hbase-${project.version}.jar
+ hbase-${project.version}-tests.jar
diff --git a/src/main/java/org/apache/hadoop/hbase/master/HMaster.java b/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
index 5367638f171..04bc95ecf78 100644
--- a/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
+++ b/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
@@ -504,8 +504,7 @@ public class HMaster extends Thread implements HMasterInterface,
this.serverManager.getServersToServerInfo().keySet().toString());
}
}
- final HServerAddress root = this.regionManager.getRootRegionLocation();
- switch (this.regionServerOperationQueue.process(root)) {
+ switch (this.regionServerOperationQueue.process()) {
case FAILED:
// If FAILED op processing, bad. Exit.
break FINISHED;
diff --git a/src/main/java/org/apache/hadoop/hbase/master/RegionManager.java b/src/main/java/org/apache/hadoop/hbase/master/RegionManager.java
index 7340cfeb49e..b4ba5ab15c0 100644
--- a/src/main/java/org/apache/hadoop/hbase/master/RegionManager.java
+++ b/src/main/java/org/apache/hadoop/hbase/master/RegionManager.java
@@ -19,32 +19,6 @@
*/
package org.apache.hadoop.hbase.master;
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.fs.PathFilter;
-import org.apache.hadoop.hbase.HConstants;
-import org.apache.hadoop.hbase.HMsg;
-import org.apache.hadoop.hbase.HRegionInfo;
-import org.apache.hadoop.hbase.HServerAddress;
-import org.apache.hadoop.hbase.HServerInfo;
-import org.apache.hadoop.hbase.HServerLoad;
-import org.apache.hadoop.hbase.client.Put;
-import org.apache.hadoop.hbase.executor.RegionTransitionEventData;
-import org.apache.hadoop.hbase.executor.HBaseEventHandler;
-import org.apache.hadoop.hbase.executor.HBaseEventHandler.HBaseEventType;
-import org.apache.hadoop.hbase.ipc.HRegionInterface;
-import org.apache.hadoop.hbase.regionserver.HRegion;
-import org.apache.hadoop.hbase.regionserver.wal.HLog;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.hbase.util.Pair;
-import org.apache.hadoop.hbase.util.Threads;
-import org.apache.hadoop.hbase.util.Writables;
-import org.apache.hadoop.hbase.zookeeper.ZooKeeperWrapper;
-import org.apache.hadoop.io.WritableUtils;
-
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
@@ -61,6 +35,29 @@ import java.util.concurrent.ConcurrentSkipListMap;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicReference;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.PathFilter;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.HMsg;
+import org.apache.hadoop.hbase.HRegionInfo;
+import org.apache.hadoop.hbase.HServerAddress;
+import org.apache.hadoop.hbase.HServerInfo;
+import org.apache.hadoop.hbase.HServerLoad;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.executor.RegionTransitionEventData;
+import org.apache.hadoop.hbase.executor.HBaseEventHandler.HBaseEventType;
+import org.apache.hadoop.hbase.ipc.HRegionInterface;
+import org.apache.hadoop.hbase.regionserver.HRegion;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.Pair;
+import org.apache.hadoop.hbase.util.Threads;
+import org.apache.hadoop.hbase.util.Writables;
+import org.apache.hadoop.hbase.zookeeper.ZooKeeperWrapper;
+
/**
* Class to manage assigning regions to servers, state of root and meta, etc.
*/
diff --git a/src/main/java/org/apache/hadoop/hbase/master/RegionServerOperationQueue.java b/src/main/java/org/apache/hadoop/hbase/master/RegionServerOperationQueue.java
index 10f9dbd67c5..d1283706161 100644
--- a/src/main/java/org/apache/hadoop/hbase/master/RegionServerOperationQueue.java
+++ b/src/main/java/org/apache/hadoop/hbase/master/RegionServerOperationQueue.java
@@ -111,17 +111,16 @@ public class RegionServerOperationQueue {
/**
* Try to get an operation off of the queue and process it.
- * @param rootRegionLocation Location of the root region.
* @return {@link ProcessingResultCode#PROCESSED},
* {@link ProcessingResultCode#REQUEUED},
* {@link ProcessingResultCode#REQUEUED_BUT_PROBLEM}
*/
- public synchronized ProcessingResultCode process(final HServerAddress rootRegionLocation) {
+ public synchronized ProcessingResultCode process() {
RegionServerOperation op = null;
// Only process the delayed queue if root region is online. If offline,
// the operation to put it online is probably in the toDoQueue. Process
// it first.
- if (rootRegionLocation != null) {
+ if (toDoQueue.isEmpty()) {
op = delayedToDoQueue.poll();
}
if (op == null) {
diff --git a/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java b/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java
index 0e69709dd5e..d865e385276 100644
--- a/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java
+++ b/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java
@@ -50,6 +50,7 @@ import org.apache.hadoop.hbase.master.HMaster;
import org.apache.hadoop.hbase.regionserver.HRegionServer;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.FSUtils;
+import org.apache.hadoop.hbase.util.Threads;
import org.apache.hadoop.hbase.util.Writables;
import org.apache.hadoop.hbase.zookeeper.ZooKeeperWrapper;
import org.apache.hadoop.hdfs.MiniDFSCluster;
@@ -920,4 +921,35 @@ public class HBaseTestingUtility {
LOG.info("Could not set max recovery field", e);
}
}
-}
+
+
+ /**
+ * Wait until countOfRegion
in .META. have a non-empty
+ * info:server. This means all regions have been deployed, master has been
+ * informed and updated .META. with the regions deployed server.
+ * @param conf Configuration
+ * @param countOfRegions How many regions in .META.
+ * @throws IOException
+ */
+ public void waitUntilAllRegionsAssigned(final int countOfRegions)
+ throws IOException {
+ HTable meta = new HTable(getConfiguration(), HConstants.META_TABLE_NAME);
+ while (true) {
+ int rows = 0;
+ Scan scan = new Scan();
+ scan.addColumn(HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER);
+ ResultScanner s = meta.getScanner(scan);
+ for (Result r = null; (r = s.next()) != null;) {
+ byte [] b =
+ r.getValue(HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER);
+ if (b == null || b.length <= 0) break;
+ rows++;
+ }
+ s.close();
+ // If I get to here and all rows have a Server, then all have been assigned.
+ if (rows == countOfRegions) break;
+ LOG.info("Found=" + rows);
+ Threads.sleep(1000);
+ }
+ }
+}
\ No newline at end of file
diff --git a/src/test/java/org/apache/hadoop/hbase/master/TestMasterTransitions.java b/src/test/java/org/apache/hadoop/hbase/master/TestMasterTransitions.java
index e59bdf76777..8848b3df605 100644
--- a/src/test/java/org/apache/hadoop/hbase/master/TestMasterTransitions.java
+++ b/src/test/java/org/apache/hadoop/hbase/master/TestMasterTransitions.java
@@ -84,7 +84,7 @@ public class TestMasterTransitions {
TEST_UTIL.createTable(Bytes.toBytes(TABLENAME), FAMILIES);
HTable t = new HTable(TEST_UTIL.getConfiguration(), TABLENAME);
int countOfRegions = TEST_UTIL.createMultiRegions(t, getTestFamily());
- waitUntilAllRegionsAssigned(countOfRegions);
+ TEST_UTIL.waitUntilAllRegionsAssigned(countOfRegions);
addToEachStartKey(countOfRegions);
}
@@ -456,36 +456,6 @@ public class TestMasterTransitions {
assertTrue((t.get(g)).size() > 0);
}
- /*
- * Wait until all rows in .META. have a non-empty info:server. This means
- * all regions have been deployed, master has been informed and updated
- * .META. with the regions deployed server.
- * @param countOfRegions How many regions in .META.
- * @throws IOException
- */
- private static void waitUntilAllRegionsAssigned(final int countOfRegions)
- throws IOException {
- HTable meta = new HTable(TEST_UTIL.getConfiguration(),
- HConstants.META_TABLE_NAME);
- while (true) {
- int rows = 0;
- Scan scan = new Scan();
- scan.addColumn(HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER);
- ResultScanner s = meta.getScanner(scan);
- for (Result r = null; (r = s.next()) != null;) {
- byte [] b =
- r.getValue(HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER);
- if (b == null || b.length <= 0) break;
- rows++;
- }
- s.close();
- // If I get to here and all rows have a Server, then all have been assigned.
- if (rows == countOfRegions) break;
- LOG.info("Found=" + rows);
- Threads.sleep(1000);
- }
- }
-
/*
* @return Count of regions in meta table.
* @throws IOException
diff --git a/src/test/java/org/apache/hadoop/hbase/master/TestROOTAssignment.java b/src/test/java/org/apache/hadoop/hbase/master/TestROOTAssignment.java
new file mode 100644
index 00000000000..0125134221f
--- /dev/null
+++ b/src/test/java/org/apache/hadoop/hbase/master/TestROOTAssignment.java
@@ -0,0 +1,169 @@
+/**
+ * Copyright 2010 The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.master;
+
+import java.io.IOException;
+import java.util.Set;
+import java.util.concurrent.CopyOnWriteArraySet;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.HMsg;
+import org.apache.hadoop.hbase.HServerAddress;
+import org.apache.hadoop.hbase.HServerInfo;
+import org.apache.hadoop.hbase.MiniHBaseCluster;
+import org.apache.hadoop.hbase.client.HTable;
+import org.apache.hadoop.hbase.regionserver.HRegionServer;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.Threads;
+import org.junit.AfterClass;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+/**
+ * Test issues assigning ROOT.
+ */
+public class TestROOTAssignment {
+ private static final Log LOG = LogFactory.getLog(TestROOTAssignment.class);
+ private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
+ private static final byte [] TABLENAME = Bytes.toBytes("root_assignments");
+ private static final byte [][] FAMILIES =
+ new byte [][] {Bytes.toBytes("family")};
+
+ /**
+ * Start up a mini cluster and put a small table of many empty regions into it.
+ * @throws Exception
+ */
+ @BeforeClass public static void beforeAllTests() throws Exception {
+ TEST_UTIL.getConfiguration().setInt("hbase.regions.percheckin", 2);
+ // Start a cluster of two regionservers.
+ TEST_UTIL.startMiniCluster(2);
+ // Create a table of three families. This will assign a region.
+ TEST_UTIL.createTable(TABLENAME, FAMILIES);
+ HTable t = new HTable(TEST_UTIL.getConfiguration(), TABLENAME);
+ int countOfRegions = TEST_UTIL.createMultiRegions(t, FAMILIES[0]);
+ TEST_UTIL.waitUntilAllRegionsAssigned(countOfRegions);
+ HTable table = new HTable(TEST_UTIL.getConfiguration(), TABLENAME);
+ TEST_UTIL.loadTable(table, FAMILIES[0]);
+ table.close();
+ }
+
+ @AfterClass public static void afterAllTests() throws IOException {
+ TEST_UTIL.shutdownMiniCluster();
+ }
+
+ @Before public void setup() throws IOException {
+ TEST_UTIL.ensureSomeRegionServersAvailable(2);
+ }
+
+ /**
+ * Interrupt processing of server shutdown so it gets put on delay queue.
+ */
+ static class PostponeShutdownProcessing implements RegionServerOperationListener {
+ // Map of what we've delayed so we don't do do repeated delays.
+ private final Set postponed =
+ new CopyOnWriteArraySet();
+ private boolean done = false;
+ private final HServerAddress rootServerAddress;
+ private final HMaster master;
+
+ PostponeShutdownProcessing(final HMaster master,
+ final HServerAddress rootServerAddress) {
+ this.master = master;
+ this.rootServerAddress = rootServerAddress;
+ }
+
+ @Override
+ public boolean process(final RegionServerOperation op) throws IOException {
+ // If a regionserver shutdown and its of the root server, then we want to
+ // delay the processing of the shutdown
+ boolean result = true;
+ if (op instanceof ProcessServerShutdown) {
+ ProcessServerShutdown pss = (ProcessServerShutdown)op;
+ if (pss.getDeadServerAddress().equals(this.rootServerAddress)) {
+ // Don't postpone more than once.
+ if (!this.postponed.contains(pss)) {
+ this.postponed.add(pss);
+ Assert.assertNull(this.master.getRegionManager().getRootRegionLocation());
+ pss.setDelay(1 * 1000);
+ // Return false. This will add this op to the delayed queue.
+ result = false;
+ }
+ }
+ }
+ return result;
+ }
+
+ @Override
+ public boolean process(HServerInfo serverInfo, HMsg incomingMsg) {
+ return true;
+ }
+
+ @Override
+ public void processed(RegionServerOperation op) {
+ if (op instanceof ProcessServerShutdown) {
+ ProcessServerShutdown pss = (ProcessServerShutdown)op;
+ if (pss.getDeadServerAddress().equals(this.rootServerAddress)) {
+ this.done = true;
+ }
+ }
+ }
+
+ public boolean isDone() {
+ return this.done;
+ }
+ }
+
+ /**
+ * If the split of the log for the regionserver hosting ROOT doesn't go off
+ * smoothly, if the process server shutdown gets added to the delayed queue
+ * of events to process, then ROOT was not being allocated, ever.
+ * @see HBASE-2707
+ */
+ @Test (timeout=300000) public void testROOTDeployedThoughProblemSplittingLog()
+ throws Exception {
+ LOG.info("Running testROOTDeployedThoughProblemSplittingLog");
+ MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
+ final HMaster master = cluster.getMaster();
+ byte [] rootRegion = Bytes.toBytes("-ROOT-,,0");
+ int rootIndex = cluster.getServerWith(rootRegion);
+ final HRegionServer rootHRS = cluster.getRegionServer(rootIndex);
+
+ // Add our RegionServerOperationsListener
+ PostponeShutdownProcessing listener = new PostponeShutdownProcessing(master,
+ rootHRS.getHServerInfo().getServerAddress());
+ master.getRegionServerOperationQueue().
+ registerRegionServerOperationListener(listener);
+ try {
+ // Now close the server carrying meta.
+ cluster.abortRegionServer(rootIndex);
+
+ // Wait for processing of the shutdown server.
+ while(!listener.isDone()) Threads.sleep(100);
+ master.getRegionManager().waitForRootRegionLocation();
+ } finally {
+ master.getRegionServerOperationQueue().
+ unregisterRegionServerOperationListener(listener);
+ }
+ }
+}