HBASE-4880 Region is on service before openRegionHandler completes, may cause data loss

git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1212251 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael Stack 2011-12-09 05:59:34 +00:00
parent abf897ad42
commit 307ca7c68b
5 changed files with 29 additions and 20 deletions

View File

@ -38,11 +38,11 @@ import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Random;
import java.util.Set;
import java.util.SortedMap;
import java.util.TreeMap;
import java.util.Map.Entry;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentSkipListMap;
import java.util.concurrent.atomic.AtomicBoolean;
@ -56,11 +56,11 @@ import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.Chore;
import org.apache.hadoop.util.ReflectionUtils;
import org.apache.hadoop.hbase.ClockOutOfSyncException;
import org.apache.hadoop.hbase.DoNotRetryIOException;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HConstants.OperationStatusCode;
import org.apache.hadoop.hbase.HDFSBlocksDistribution;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.HServerAddress;
@ -77,7 +77,6 @@ import org.apache.hadoop.hbase.TableDescriptors;
import org.apache.hadoop.hbase.UnknownRowLockException;
import org.apache.hadoop.hbase.UnknownScannerException;
import org.apache.hadoop.hbase.YouAreDeadException;
import org.apache.hadoop.hbase.HConstants.OperationStatusCode;
import org.apache.hadoop.hbase.catalog.CatalogTracker;
import org.apache.hadoop.hbase.catalog.MetaEditor;
import org.apache.hadoop.hbase.catalog.RootLocationEditor;
@ -99,8 +98,8 @@ import org.apache.hadoop.hbase.coprocessor.CoprocessorHost;
import org.apache.hadoop.hbase.executor.ExecutorService;
import org.apache.hadoop.hbase.executor.ExecutorService.ExecutorType;
import org.apache.hadoop.hbase.filter.BinaryComparator;
import org.apache.hadoop.hbase.filter.WritableByteArrayComparable;
import org.apache.hadoop.hbase.filter.CompareFilter.CompareOp;
import org.apache.hadoop.hbase.filter.WritableByteArrayComparable;
import org.apache.hadoop.hbase.io.hfile.BlockCache;
import org.apache.hadoop.hbase.io.hfile.BlockCacheColumnFamilySummary;
import org.apache.hadoop.hbase.io.hfile.CacheConfig;
@ -126,8 +125,8 @@ import org.apache.hadoop.hbase.regionserver.handler.OpenRootHandler;
import org.apache.hadoop.hbase.regionserver.metrics.RegionServerDynamicMetrics;
import org.apache.hadoop.hbase.regionserver.metrics.RegionServerMetrics;
import org.apache.hadoop.hbase.regionserver.metrics.SchemaMetrics;
import org.apache.hadoop.hbase.regionserver.wal.FailedLogCloseException;
import org.apache.hadoop.hbase.regionserver.metrics.SchemaMetrics.StoreMetricType;
import org.apache.hadoop.hbase.regionserver.wal.FailedLogCloseException;
import org.apache.hadoop.hbase.regionserver.wal.HLog;
import org.apache.hadoop.hbase.regionserver.wal.WALActionsListener;
import org.apache.hadoop.hbase.security.User;
@ -142,14 +141,15 @@ import org.apache.hadoop.hbase.util.Sleeper;
import org.apache.hadoop.hbase.util.Threads;
import org.apache.hadoop.hbase.util.VersionInfo;
import org.apache.hadoop.hbase.zookeeper.ClusterStatusTracker;
import org.apache.hadoop.hbase.zookeeper.SchemaChangeTracker;
import org.apache.hadoop.hbase.zookeeper.ZKUtil;
import org.apache.hadoop.hbase.zookeeper.ZooKeeperNodeTracker;
import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
import org.apache.hadoop.hbase.zookeeper.SchemaChangeTracker;
import org.apache.hadoop.io.MapWritable;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.ipc.RemoteException;
import org.apache.hadoop.net.DNS;
import org.apache.hadoop.util.ReflectionUtils;
import org.apache.hadoop.util.StringUtils;
import org.apache.zookeeper.KeeperException;
import org.codehaus.jackson.map.ObjectMapper;
@ -1589,9 +1589,6 @@ public class HRegionServer implements HRegionInterface, HBaseRPCErrorHandler,
getCompactionRequester().requestCompaction(r, s, "Opening Region");
}
}
// Add to online regions if all above was successful.
addToOnlineRegions(r);
// Update ZK, ROOT or META
if (r.getRegionInfo().isRootRegion()) {
RootLocationEditor.setRootLocation(getZooKeeper(),

View File

@ -57,6 +57,7 @@ public interface RegionServerServices extends OnlineRegions {
/**
* Tasks to perform after region open to complete deploy of region on
* regionserver
*
* @param r Region to open.
* @param ct Instance of {@link CatalogTracker}
* @param daughter True if this is daughter of a split

View File

@ -41,8 +41,8 @@ import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.Server;
import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.catalog.MetaEditor;
import org.apache.hadoop.hbase.executor.RegionTransitionData;
import org.apache.hadoop.hbase.executor.EventHandler.EventType;
import org.apache.hadoop.hbase.executor.RegionTransitionData;
import org.apache.hadoop.hbase.io.Reference.Range;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.CancelableProgressable;
@ -361,7 +361,10 @@ public class SplitTransaction {
try {
// add 2nd daughter first (see HBASE-4335)
services.postOpenDeployTasks(b, server.getCatalogTracker(), true);
// Should add it to OnlineRegions
services.addToOnlineRegions(b);
services.postOpenDeployTasks(a, server.getCatalogTracker(), true);
services.addToOnlineRegions(a);
} catch (KeeperException ke) {
throw new IOException(ke);
}

View File

@ -24,7 +24,6 @@ import java.util.concurrent.atomic.AtomicBoolean;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.Server;
@ -110,10 +109,11 @@ public class OpenRegionHandler extends EventHandler {
tryTransitionToFailedOpen(regionInfo);
return;
}
boolean failed = true;
if (tickleOpening("post_region_open")) {
if (updateMeta(region)) failed = false;
if (updateMeta(region)) {
failed = false;
}
}
if (failed || this.server.isStopped() ||
this.rsServices.isStopping()) {
@ -132,6 +132,8 @@ public class OpenRegionHandler extends EventHandler {
cleanupFailedOpen(region);
return;
}
// Successful region open, and add it to OnlineRegions
this.rsServices.addToOnlineRegions(region);
// Done! Successful region open
LOG.debug("Opened " + name + " on server:" +
@ -212,9 +214,10 @@ public class OpenRegionHandler extends EventHandler {
}
/**
* Thread to run region post open tasks. Call {@link #getException()} after
* Thread to run region post open tasks. Call {@link #getException()} after
* the thread finishes to check for exceptions running
* {@link RegionServerServices#postOpenDeployTasks(HRegion, org.apache.hadoop.hbase.catalog.CatalogTracker, boolean)}.
* {@link RegionServerServices#postOpenDeployTasks(HRegion, org.apache.hadoop.hbase.catalog.CatalogTracker, boolean)}
* .
*/
static class PostOpenDeployTasksThread extends Thread {
private Exception exception = null;
@ -348,7 +351,6 @@ public class OpenRegionHandler extends EventHandler {
private void cleanupFailedOpen(final HRegion region) throws IOException {
if (region != null) region.close();
this.rsServices.removeFromOnlineRegions(regionInfo.getEncodedName());
}
/**

View File

@ -17,9 +17,14 @@
*/
package org.apache.hadoop.hbase.regionserver;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
import java.io.IOException;
import org.apache.hadoop.hbase.*;
import org.apache.hadoop.hbase.HBaseTestingUtility;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.LargeTests;
import org.apache.hadoop.hbase.client.Get;
import org.apache.hadoop.hbase.client.HConnection;
import org.apache.hadoop.hbase.client.HConnectionManager;
@ -32,9 +37,6 @@ import org.junit.BeforeClass;
import org.junit.Test;
import org.junit.experimental.categories.Category;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.assertFalse;
@Category(LargeTests.class)
public class TestEndToEndSplitTransaction {
private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
@ -85,6 +87,8 @@ public class TestEndToEndSplitTransaction {
// note that this replicates some code from SplitTransaction
// 2nd daughter first
server.postOpenDeployTasks(regions.getSecond(), server.getCatalogTracker(), true);
// Add to online regions
server.addToOnlineRegions(regions.getSecond());
// THIS is the crucial point:
// the 2nd daughter was added, so querying before the split key should fail.
assertFalse(test(con, tableName, firstRow, server));
@ -93,6 +97,8 @@ public class TestEndToEndSplitTransaction {
// first daughter second
server.postOpenDeployTasks(regions.getFirst(), server.getCatalogTracker(), true);
// Add to online regions
server.addToOnlineRegions(regions.getFirst());
assertTrue(test(con, tableName, firstRow, server));
assertTrue(test(con, tableName, lastRow, server));