diff --git a/CHANGES.txt b/CHANGES.txt index 83b6635c22c..f5e1c3b6253 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -423,6 +423,8 @@ Release 0.21.0 - Unreleased no progress running YCSB on clean cluster startup HBASE-2785 TestScannerTimeout.test2772 is flaky HBASE-2787 PE is confused about flushCommits + HBASE-2707 Can't recover from a dead ROOT server if any exceptions happens + during log splitting IMPROVEMENTS HBASE-1760 Cleanup TODOs in HTable diff --git a/src/assembly/bin.xml b/src/assembly/bin.xml index b895fc57406..bf772f0977e 100644 --- a/src/assembly/bin.xml +++ b/src/assembly/bin.xml @@ -28,6 +28,7 @@ / hbase-${project.version}.jar + hbase-${project.version}-tests.jar diff --git a/src/main/java/org/apache/hadoop/hbase/master/HMaster.java b/src/main/java/org/apache/hadoop/hbase/master/HMaster.java index 5367638f171..04bc95ecf78 100644 --- a/src/main/java/org/apache/hadoop/hbase/master/HMaster.java +++ b/src/main/java/org/apache/hadoop/hbase/master/HMaster.java @@ -504,8 +504,7 @@ public class HMaster extends Thread implements HMasterInterface, this.serverManager.getServersToServerInfo().keySet().toString()); } } - final HServerAddress root = this.regionManager.getRootRegionLocation(); - switch (this.regionServerOperationQueue.process(root)) { + switch (this.regionServerOperationQueue.process()) { case FAILED: // If FAILED op processing, bad. Exit. break FINISHED; diff --git a/src/main/java/org/apache/hadoop/hbase/master/RegionManager.java b/src/main/java/org/apache/hadoop/hbase/master/RegionManager.java index 7340cfeb49e..b4ba5ab15c0 100644 --- a/src/main/java/org/apache/hadoop/hbase/master/RegionManager.java +++ b/src/main/java/org/apache/hadoop/hbase/master/RegionManager.java @@ -19,32 +19,6 @@ */ package org.apache.hadoop.hbase.master; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileStatus; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.PathFilter; -import org.apache.hadoop.hbase.HConstants; -import org.apache.hadoop.hbase.HMsg; -import org.apache.hadoop.hbase.HRegionInfo; -import org.apache.hadoop.hbase.HServerAddress; -import org.apache.hadoop.hbase.HServerInfo; -import org.apache.hadoop.hbase.HServerLoad; -import org.apache.hadoop.hbase.client.Put; -import org.apache.hadoop.hbase.executor.RegionTransitionEventData; -import org.apache.hadoop.hbase.executor.HBaseEventHandler; -import org.apache.hadoop.hbase.executor.HBaseEventHandler.HBaseEventType; -import org.apache.hadoop.hbase.ipc.HRegionInterface; -import org.apache.hadoop.hbase.regionserver.HRegion; -import org.apache.hadoop.hbase.regionserver.wal.HLog; -import org.apache.hadoop.hbase.util.Bytes; -import org.apache.hadoop.hbase.util.Pair; -import org.apache.hadoop.hbase.util.Threads; -import org.apache.hadoop.hbase.util.Writables; -import org.apache.hadoop.hbase.zookeeper.ZooKeeperWrapper; -import org.apache.hadoop.io.WritableUtils; - import java.io.IOException; import java.util.ArrayList; import java.util.Collections; @@ -61,6 +35,29 @@ import java.util.concurrent.ConcurrentSkipListMap; import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicReference; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.PathFilter; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.HMsg; +import org.apache.hadoop.hbase.HRegionInfo; +import org.apache.hadoop.hbase.HServerAddress; +import org.apache.hadoop.hbase.HServerInfo; +import org.apache.hadoop.hbase.HServerLoad; +import org.apache.hadoop.hbase.client.Put; +import org.apache.hadoop.hbase.executor.RegionTransitionEventData; +import org.apache.hadoop.hbase.executor.HBaseEventHandler.HBaseEventType; +import org.apache.hadoop.hbase.ipc.HRegionInterface; +import org.apache.hadoop.hbase.regionserver.HRegion; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.Pair; +import org.apache.hadoop.hbase.util.Threads; +import org.apache.hadoop.hbase.util.Writables; +import org.apache.hadoop.hbase.zookeeper.ZooKeeperWrapper; + /** * Class to manage assigning regions to servers, state of root and meta, etc. */ diff --git a/src/main/java/org/apache/hadoop/hbase/master/RegionServerOperationQueue.java b/src/main/java/org/apache/hadoop/hbase/master/RegionServerOperationQueue.java index 10f9dbd67c5..d1283706161 100644 --- a/src/main/java/org/apache/hadoop/hbase/master/RegionServerOperationQueue.java +++ b/src/main/java/org/apache/hadoop/hbase/master/RegionServerOperationQueue.java @@ -111,17 +111,16 @@ public class RegionServerOperationQueue { /** * Try to get an operation off of the queue and process it. - * @param rootRegionLocation Location of the root region. * @return {@link ProcessingResultCode#PROCESSED}, * {@link ProcessingResultCode#REQUEUED}, * {@link ProcessingResultCode#REQUEUED_BUT_PROBLEM} */ - public synchronized ProcessingResultCode process(final HServerAddress rootRegionLocation) { + public synchronized ProcessingResultCode process() { RegionServerOperation op = null; // Only process the delayed queue if root region is online. If offline, // the operation to put it online is probably in the toDoQueue. Process // it first. - if (rootRegionLocation != null) { + if (toDoQueue.isEmpty()) { op = delayedToDoQueue.poll(); } if (op == null) { diff --git a/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java b/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java index 0e69709dd5e..d865e385276 100644 --- a/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java +++ b/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java @@ -50,6 +50,7 @@ import org.apache.hadoop.hbase.master.HMaster; import org.apache.hadoop.hbase.regionserver.HRegionServer; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.FSUtils; +import org.apache.hadoop.hbase.util.Threads; import org.apache.hadoop.hbase.util.Writables; import org.apache.hadoop.hbase.zookeeper.ZooKeeperWrapper; import org.apache.hadoop.hdfs.MiniDFSCluster; @@ -920,4 +921,35 @@ public class HBaseTestingUtility { LOG.info("Could not set max recovery field", e); } } -} + + + /** + * Wait until countOfRegion in .META. have a non-empty + * info:server. This means all regions have been deployed, master has been + * informed and updated .META. with the regions deployed server. + * @param conf Configuration + * @param countOfRegions How many regions in .META. + * @throws IOException + */ + public void waitUntilAllRegionsAssigned(final int countOfRegions) + throws IOException { + HTable meta = new HTable(getConfiguration(), HConstants.META_TABLE_NAME); + while (true) { + int rows = 0; + Scan scan = new Scan(); + scan.addColumn(HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER); + ResultScanner s = meta.getScanner(scan); + for (Result r = null; (r = s.next()) != null;) { + byte [] b = + r.getValue(HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER); + if (b == null || b.length <= 0) break; + rows++; + } + s.close(); + // If I get to here and all rows have a Server, then all have been assigned. + if (rows == countOfRegions) break; + LOG.info("Found=" + rows); + Threads.sleep(1000); + } + } +} \ No newline at end of file diff --git a/src/test/java/org/apache/hadoop/hbase/master/TestMasterTransitions.java b/src/test/java/org/apache/hadoop/hbase/master/TestMasterTransitions.java index e59bdf76777..8848b3df605 100644 --- a/src/test/java/org/apache/hadoop/hbase/master/TestMasterTransitions.java +++ b/src/test/java/org/apache/hadoop/hbase/master/TestMasterTransitions.java @@ -84,7 +84,7 @@ public class TestMasterTransitions { TEST_UTIL.createTable(Bytes.toBytes(TABLENAME), FAMILIES); HTable t = new HTable(TEST_UTIL.getConfiguration(), TABLENAME); int countOfRegions = TEST_UTIL.createMultiRegions(t, getTestFamily()); - waitUntilAllRegionsAssigned(countOfRegions); + TEST_UTIL.waitUntilAllRegionsAssigned(countOfRegions); addToEachStartKey(countOfRegions); } @@ -456,36 +456,6 @@ public class TestMasterTransitions { assertTrue((t.get(g)).size() > 0); } - /* - * Wait until all rows in .META. have a non-empty info:server. This means - * all regions have been deployed, master has been informed and updated - * .META. with the regions deployed server. - * @param countOfRegions How many regions in .META. - * @throws IOException - */ - private static void waitUntilAllRegionsAssigned(final int countOfRegions) - throws IOException { - HTable meta = new HTable(TEST_UTIL.getConfiguration(), - HConstants.META_TABLE_NAME); - while (true) { - int rows = 0; - Scan scan = new Scan(); - scan.addColumn(HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER); - ResultScanner s = meta.getScanner(scan); - for (Result r = null; (r = s.next()) != null;) { - byte [] b = - r.getValue(HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER); - if (b == null || b.length <= 0) break; - rows++; - } - s.close(); - // If I get to here and all rows have a Server, then all have been assigned. - if (rows == countOfRegions) break; - LOG.info("Found=" + rows); - Threads.sleep(1000); - } - } - /* * @return Count of regions in meta table. * @throws IOException diff --git a/src/test/java/org/apache/hadoop/hbase/master/TestROOTAssignment.java b/src/test/java/org/apache/hadoop/hbase/master/TestROOTAssignment.java new file mode 100644 index 00000000000..0125134221f --- /dev/null +++ b/src/test/java/org/apache/hadoop/hbase/master/TestROOTAssignment.java @@ -0,0 +1,169 @@ +/** + * Copyright 2010 The Apache Software Foundation + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.master; + +import java.io.IOException; +import java.util.Set; +import java.util.concurrent.CopyOnWriteArraySet; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.HMsg; +import org.apache.hadoop.hbase.HServerAddress; +import org.apache.hadoop.hbase.HServerInfo; +import org.apache.hadoop.hbase.MiniHBaseCluster; +import org.apache.hadoop.hbase.client.HTable; +import org.apache.hadoop.hbase.regionserver.HRegionServer; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.Threads; +import org.junit.AfterClass; +import org.junit.Assert; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; + +/** + * Test issues assigning ROOT. + */ +public class TestROOTAssignment { + private static final Log LOG = LogFactory.getLog(TestROOTAssignment.class); + private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); + private static final byte [] TABLENAME = Bytes.toBytes("root_assignments"); + private static final byte [][] FAMILIES = + new byte [][] {Bytes.toBytes("family")}; + + /** + * Start up a mini cluster and put a small table of many empty regions into it. + * @throws Exception + */ + @BeforeClass public static void beforeAllTests() throws Exception { + TEST_UTIL.getConfiguration().setInt("hbase.regions.percheckin", 2); + // Start a cluster of two regionservers. + TEST_UTIL.startMiniCluster(2); + // Create a table of three families. This will assign a region. + TEST_UTIL.createTable(TABLENAME, FAMILIES); + HTable t = new HTable(TEST_UTIL.getConfiguration(), TABLENAME); + int countOfRegions = TEST_UTIL.createMultiRegions(t, FAMILIES[0]); + TEST_UTIL.waitUntilAllRegionsAssigned(countOfRegions); + HTable table = new HTable(TEST_UTIL.getConfiguration(), TABLENAME); + TEST_UTIL.loadTable(table, FAMILIES[0]); + table.close(); + } + + @AfterClass public static void afterAllTests() throws IOException { + TEST_UTIL.shutdownMiniCluster(); + } + + @Before public void setup() throws IOException { + TEST_UTIL.ensureSomeRegionServersAvailable(2); + } + + /** + * Interrupt processing of server shutdown so it gets put on delay queue. + */ + static class PostponeShutdownProcessing implements RegionServerOperationListener { + // Map of what we've delayed so we don't do do repeated delays. + private final Set postponed = + new CopyOnWriteArraySet(); + private boolean done = false; + private final HServerAddress rootServerAddress; + private final HMaster master; + + PostponeShutdownProcessing(final HMaster master, + final HServerAddress rootServerAddress) { + this.master = master; + this.rootServerAddress = rootServerAddress; + } + + @Override + public boolean process(final RegionServerOperation op) throws IOException { + // If a regionserver shutdown and its of the root server, then we want to + // delay the processing of the shutdown + boolean result = true; + if (op instanceof ProcessServerShutdown) { + ProcessServerShutdown pss = (ProcessServerShutdown)op; + if (pss.getDeadServerAddress().equals(this.rootServerAddress)) { + // Don't postpone more than once. + if (!this.postponed.contains(pss)) { + this.postponed.add(pss); + Assert.assertNull(this.master.getRegionManager().getRootRegionLocation()); + pss.setDelay(1 * 1000); + // Return false. This will add this op to the delayed queue. + result = false; + } + } + } + return result; + } + + @Override + public boolean process(HServerInfo serverInfo, HMsg incomingMsg) { + return true; + } + + @Override + public void processed(RegionServerOperation op) { + if (op instanceof ProcessServerShutdown) { + ProcessServerShutdown pss = (ProcessServerShutdown)op; + if (pss.getDeadServerAddress().equals(this.rootServerAddress)) { + this.done = true; + } + } + } + + public boolean isDone() { + return this.done; + } + } + + /** + * If the split of the log for the regionserver hosting ROOT doesn't go off + * smoothly, if the process server shutdown gets added to the delayed queue + * of events to process, then ROOT was not being allocated, ever. + * @see HBASE-2707 + */ + @Test (timeout=300000) public void testROOTDeployedThoughProblemSplittingLog() + throws Exception { + LOG.info("Running testROOTDeployedThoughProblemSplittingLog"); + MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster(); + final HMaster master = cluster.getMaster(); + byte [] rootRegion = Bytes.toBytes("-ROOT-,,0"); + int rootIndex = cluster.getServerWith(rootRegion); + final HRegionServer rootHRS = cluster.getRegionServer(rootIndex); + + // Add our RegionServerOperationsListener + PostponeShutdownProcessing listener = new PostponeShutdownProcessing(master, + rootHRS.getHServerInfo().getServerAddress()); + master.getRegionServerOperationQueue(). + registerRegionServerOperationListener(listener); + try { + // Now close the server carrying meta. + cluster.abortRegionServer(rootIndex); + + // Wait for processing of the shutdown server. + while(!listener.isDone()) Threads.sleep(100); + master.getRegionManager().waitForRootRegionLocation(); + } finally { + master.getRegionServerOperationQueue(). + unregisterRegionServerOperationListener(listener); + } + } +}