HBASE-4880 Region is on service before openRegionHandler completes, may cause data loss

git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1212251 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael Stack 2011-12-09 05:59:34 +00:00
parent abf897ad42
commit 307ca7c68b
5 changed files with 29 additions and 20 deletions

View File

@ -38,11 +38,11 @@ import java.util.HashMap;
import java.util.LinkedList; import java.util.LinkedList;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Map.Entry;
import java.util.Random; import java.util.Random;
import java.util.Set; import java.util.Set;
import java.util.SortedMap; import java.util.SortedMap;
import java.util.TreeMap; import java.util.TreeMap;
import java.util.Map.Entry;
import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentSkipListMap; import java.util.concurrent.ConcurrentSkipListMap;
import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicBoolean;
@ -56,11 +56,11 @@ import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.Chore; import org.apache.hadoop.hbase.Chore;
import org.apache.hadoop.util.ReflectionUtils;
import org.apache.hadoop.hbase.ClockOutOfSyncException; import org.apache.hadoop.hbase.ClockOutOfSyncException;
import org.apache.hadoop.hbase.DoNotRetryIOException; import org.apache.hadoop.hbase.DoNotRetryIOException;
import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HConstants.OperationStatusCode;
import org.apache.hadoop.hbase.HDFSBlocksDistribution; import org.apache.hadoop.hbase.HDFSBlocksDistribution;
import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.HServerAddress; import org.apache.hadoop.hbase.HServerAddress;
@ -77,7 +77,6 @@ import org.apache.hadoop.hbase.TableDescriptors;
import org.apache.hadoop.hbase.UnknownRowLockException; import org.apache.hadoop.hbase.UnknownRowLockException;
import org.apache.hadoop.hbase.UnknownScannerException; import org.apache.hadoop.hbase.UnknownScannerException;
import org.apache.hadoop.hbase.YouAreDeadException; import org.apache.hadoop.hbase.YouAreDeadException;
import org.apache.hadoop.hbase.HConstants.OperationStatusCode;
import org.apache.hadoop.hbase.catalog.CatalogTracker; import org.apache.hadoop.hbase.catalog.CatalogTracker;
import org.apache.hadoop.hbase.catalog.MetaEditor; import org.apache.hadoop.hbase.catalog.MetaEditor;
import org.apache.hadoop.hbase.catalog.RootLocationEditor; import org.apache.hadoop.hbase.catalog.RootLocationEditor;
@ -99,8 +98,8 @@ import org.apache.hadoop.hbase.coprocessor.CoprocessorHost;
import org.apache.hadoop.hbase.executor.ExecutorService; import org.apache.hadoop.hbase.executor.ExecutorService;
import org.apache.hadoop.hbase.executor.ExecutorService.ExecutorType; import org.apache.hadoop.hbase.executor.ExecutorService.ExecutorType;
import org.apache.hadoop.hbase.filter.BinaryComparator; import org.apache.hadoop.hbase.filter.BinaryComparator;
import org.apache.hadoop.hbase.filter.WritableByteArrayComparable;
import org.apache.hadoop.hbase.filter.CompareFilter.CompareOp; import org.apache.hadoop.hbase.filter.CompareFilter.CompareOp;
import org.apache.hadoop.hbase.filter.WritableByteArrayComparable;
import org.apache.hadoop.hbase.io.hfile.BlockCache; import org.apache.hadoop.hbase.io.hfile.BlockCache;
import org.apache.hadoop.hbase.io.hfile.BlockCacheColumnFamilySummary; import org.apache.hadoop.hbase.io.hfile.BlockCacheColumnFamilySummary;
import org.apache.hadoop.hbase.io.hfile.CacheConfig; import org.apache.hadoop.hbase.io.hfile.CacheConfig;
@ -126,8 +125,8 @@ import org.apache.hadoop.hbase.regionserver.handler.OpenRootHandler;
import org.apache.hadoop.hbase.regionserver.metrics.RegionServerDynamicMetrics; import org.apache.hadoop.hbase.regionserver.metrics.RegionServerDynamicMetrics;
import org.apache.hadoop.hbase.regionserver.metrics.RegionServerMetrics; import org.apache.hadoop.hbase.regionserver.metrics.RegionServerMetrics;
import org.apache.hadoop.hbase.regionserver.metrics.SchemaMetrics; import org.apache.hadoop.hbase.regionserver.metrics.SchemaMetrics;
import org.apache.hadoop.hbase.regionserver.wal.FailedLogCloseException;
import org.apache.hadoop.hbase.regionserver.metrics.SchemaMetrics.StoreMetricType; import org.apache.hadoop.hbase.regionserver.metrics.SchemaMetrics.StoreMetricType;
import org.apache.hadoop.hbase.regionserver.wal.FailedLogCloseException;
import org.apache.hadoop.hbase.regionserver.wal.HLog; import org.apache.hadoop.hbase.regionserver.wal.HLog;
import org.apache.hadoop.hbase.regionserver.wal.WALActionsListener; import org.apache.hadoop.hbase.regionserver.wal.WALActionsListener;
import org.apache.hadoop.hbase.security.User; import org.apache.hadoop.hbase.security.User;
@ -142,14 +141,15 @@ import org.apache.hadoop.hbase.util.Sleeper;
import org.apache.hadoop.hbase.util.Threads; import org.apache.hadoop.hbase.util.Threads;
import org.apache.hadoop.hbase.util.VersionInfo; import org.apache.hadoop.hbase.util.VersionInfo;
import org.apache.hadoop.hbase.zookeeper.ClusterStatusTracker; import org.apache.hadoop.hbase.zookeeper.ClusterStatusTracker;
import org.apache.hadoop.hbase.zookeeper.SchemaChangeTracker;
import org.apache.hadoop.hbase.zookeeper.ZKUtil; import org.apache.hadoop.hbase.zookeeper.ZKUtil;
import org.apache.hadoop.hbase.zookeeper.ZooKeeperNodeTracker; import org.apache.hadoop.hbase.zookeeper.ZooKeeperNodeTracker;
import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher; import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
import org.apache.hadoop.hbase.zookeeper.SchemaChangeTracker;
import org.apache.hadoop.io.MapWritable; import org.apache.hadoop.io.MapWritable;
import org.apache.hadoop.io.Writable; import org.apache.hadoop.io.Writable;
import org.apache.hadoop.ipc.RemoteException; import org.apache.hadoop.ipc.RemoteException;
import org.apache.hadoop.net.DNS; import org.apache.hadoop.net.DNS;
import org.apache.hadoop.util.ReflectionUtils;
import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.util.StringUtils;
import org.apache.zookeeper.KeeperException; import org.apache.zookeeper.KeeperException;
import org.codehaus.jackson.map.ObjectMapper; import org.codehaus.jackson.map.ObjectMapper;
@ -1589,9 +1589,6 @@ public class HRegionServer implements HRegionInterface, HBaseRPCErrorHandler,
getCompactionRequester().requestCompaction(r, s, "Opening Region"); getCompactionRequester().requestCompaction(r, s, "Opening Region");
} }
} }
// Add to online regions if all above was successful.
addToOnlineRegions(r);
// Update ZK, ROOT or META // Update ZK, ROOT or META
if (r.getRegionInfo().isRootRegion()) { if (r.getRegionInfo().isRootRegion()) {
RootLocationEditor.setRootLocation(getZooKeeper(), RootLocationEditor.setRootLocation(getZooKeeper(),

View File

@ -57,6 +57,7 @@ public interface RegionServerServices extends OnlineRegions {
/** /**
* Tasks to perform after region open to complete deploy of region on * Tasks to perform after region open to complete deploy of region on
* regionserver * regionserver
*
* @param r Region to open. * @param r Region to open.
* @param ct Instance of {@link CatalogTracker} * @param ct Instance of {@link CatalogTracker}
* @param daughter True if this is daughter of a split * @param daughter True if this is daughter of a split

View File

@ -41,8 +41,8 @@ import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.Server; import org.apache.hadoop.hbase.Server;
import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.catalog.MetaEditor; import org.apache.hadoop.hbase.catalog.MetaEditor;
import org.apache.hadoop.hbase.executor.RegionTransitionData;
import org.apache.hadoop.hbase.executor.EventHandler.EventType; import org.apache.hadoop.hbase.executor.EventHandler.EventType;
import org.apache.hadoop.hbase.executor.RegionTransitionData;
import org.apache.hadoop.hbase.io.Reference.Range; import org.apache.hadoop.hbase.io.Reference.Range;
import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.CancelableProgressable; import org.apache.hadoop.hbase.util.CancelableProgressable;
@ -361,7 +361,10 @@ public class SplitTransaction {
try { try {
// add 2nd daughter first (see HBASE-4335) // add 2nd daughter first (see HBASE-4335)
services.postOpenDeployTasks(b, server.getCatalogTracker(), true); services.postOpenDeployTasks(b, server.getCatalogTracker(), true);
// Should add it to OnlineRegions
services.addToOnlineRegions(b);
services.postOpenDeployTasks(a, server.getCatalogTracker(), true); services.postOpenDeployTasks(a, server.getCatalogTracker(), true);
services.addToOnlineRegions(a);
} catch (KeeperException ke) { } catch (KeeperException ke) {
throw new IOException(ke); throw new IOException(ke);
} }

View File

@ -24,7 +24,6 @@ import java.util.concurrent.atomic.AtomicBoolean;
import org.apache.commons.logging.Log; import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory; import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.HTableDescriptor; import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.Server; import org.apache.hadoop.hbase.Server;
@ -110,10 +109,11 @@ public class OpenRegionHandler extends EventHandler {
tryTransitionToFailedOpen(regionInfo); tryTransitionToFailedOpen(regionInfo);
return; return;
} }
boolean failed = true; boolean failed = true;
if (tickleOpening("post_region_open")) { if (tickleOpening("post_region_open")) {
if (updateMeta(region)) failed = false; if (updateMeta(region)) {
failed = false;
}
} }
if (failed || this.server.isStopped() || if (failed || this.server.isStopped() ||
this.rsServices.isStopping()) { this.rsServices.isStopping()) {
@ -132,6 +132,8 @@ public class OpenRegionHandler extends EventHandler {
cleanupFailedOpen(region); cleanupFailedOpen(region);
return; return;
} }
// Successful region open, and add it to OnlineRegions
this.rsServices.addToOnlineRegions(region);
// Done! Successful region open // Done! Successful region open
LOG.debug("Opened " + name + " on server:" + LOG.debug("Opened " + name + " on server:" +
@ -212,9 +214,10 @@ public class OpenRegionHandler extends EventHandler {
} }
/** /**
* Thread to run region post open tasks. Call {@link #getException()} after * Thread to run region post open tasks. Call {@link #getException()} after
* the thread finishes to check for exceptions running * the thread finishes to check for exceptions running
* {@link RegionServerServices#postOpenDeployTasks(HRegion, org.apache.hadoop.hbase.catalog.CatalogTracker, boolean)}. * {@link RegionServerServices#postOpenDeployTasks(HRegion, org.apache.hadoop.hbase.catalog.CatalogTracker, boolean)}
* .
*/ */
static class PostOpenDeployTasksThread extends Thread { static class PostOpenDeployTasksThread extends Thread {
private Exception exception = null; private Exception exception = null;
@ -348,7 +351,6 @@ public class OpenRegionHandler extends EventHandler {
private void cleanupFailedOpen(final HRegion region) throws IOException { private void cleanupFailedOpen(final HRegion region) throws IOException {
if (region != null) region.close(); if (region != null) region.close();
this.rsServices.removeFromOnlineRegions(regionInfo.getEncodedName());
} }
/** /**

View File

@ -17,9 +17,14 @@
*/ */
package org.apache.hadoop.hbase.regionserver; package org.apache.hadoop.hbase.regionserver;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
import java.io.IOException; import java.io.IOException;
import org.apache.hadoop.hbase.*; import org.apache.hadoop.hbase.HBaseTestingUtility;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.LargeTests;
import org.apache.hadoop.hbase.client.Get; import org.apache.hadoop.hbase.client.Get;
import org.apache.hadoop.hbase.client.HConnection; import org.apache.hadoop.hbase.client.HConnection;
import org.apache.hadoop.hbase.client.HConnectionManager; import org.apache.hadoop.hbase.client.HConnectionManager;
@ -32,9 +37,6 @@ import org.junit.BeforeClass;
import org.junit.Test; import org.junit.Test;
import org.junit.experimental.categories.Category; import org.junit.experimental.categories.Category;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.assertFalse;
@Category(LargeTests.class) @Category(LargeTests.class)
public class TestEndToEndSplitTransaction { public class TestEndToEndSplitTransaction {
private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
@ -85,6 +87,8 @@ public class TestEndToEndSplitTransaction {
// note that this replicates some code from SplitTransaction // note that this replicates some code from SplitTransaction
// 2nd daughter first // 2nd daughter first
server.postOpenDeployTasks(regions.getSecond(), server.getCatalogTracker(), true); server.postOpenDeployTasks(regions.getSecond(), server.getCatalogTracker(), true);
// Add to online regions
server.addToOnlineRegions(regions.getSecond());
// THIS is the crucial point: // THIS is the crucial point:
// the 2nd daughter was added, so querying before the split key should fail. // the 2nd daughter was added, so querying before the split key should fail.
assertFalse(test(con, tableName, firstRow, server)); assertFalse(test(con, tableName, firstRow, server));
@ -93,6 +97,8 @@ public class TestEndToEndSplitTransaction {
// first daughter second // first daughter second
server.postOpenDeployTasks(regions.getFirst(), server.getCatalogTracker(), true); server.postOpenDeployTasks(regions.getFirst(), server.getCatalogTracker(), true);
// Add to online regions
server.addToOnlineRegions(regions.getFirst());
assertTrue(test(con, tableName, firstRow, server)); assertTrue(test(con, tableName, firstRow, server));
assertTrue(test(con, tableName, lastRow, server)); assertTrue(test(con, tableName, lastRow, server));