HBASE-3872 Hole in split transaction rollback; edits to .META. need to be rolled back even if it seems like they didn't make it
git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1146526 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
53b5b9005e
commit
7f10f29425
|
@ -409,6 +409,8 @@ Release 0.90.4 - Unreleased
|
|||
HBASE-4033 The shutdown RegionServer could be added to
|
||||
AssignmentManager.servers again (Jieshan Bean)
|
||||
HBASE-4088 npes in server shutdown
|
||||
HBASE-3872 Hole in split transaction rollback; edits to .META. need
|
||||
to be rolled back even if it seems like they didn't make it
|
||||
|
||||
IMPROVEMENT
|
||||
HBASE-3882 hbase-config.sh needs to be updated so it can auto-detects the
|
||||
|
|
|
@ -44,6 +44,7 @@ import org.apache.hadoop.hbase.client.Result;
|
|||
import org.apache.hadoop.hbase.regionserver.HRegion;
|
||||
import org.apache.hadoop.hbase.regionserver.Store;
|
||||
import org.apache.hadoop.hbase.regionserver.StoreFile;
|
||||
import org.apache.hadoop.hbase.util.Pair;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
import org.apache.hadoop.hbase.util.Writables;
|
||||
|
||||
|
@ -161,16 +162,15 @@ class CatalogJanitor extends Chore {
|
|||
* the filesystem.
|
||||
* @throws IOException
|
||||
*/
|
||||
boolean cleanParent(final HRegionInfo parent,
|
||||
Result rowContent)
|
||||
boolean cleanParent(final HRegionInfo parent, Result rowContent)
|
||||
throws IOException {
|
||||
boolean result = false;
|
||||
// Run checks on each daughter split.
|
||||
boolean hasReferencesA =
|
||||
Pair<Boolean, Boolean> a =
|
||||
checkDaughter(parent, rowContent, HConstants.SPLITA_QUALIFIER);
|
||||
boolean hasReferencesB =
|
||||
Pair<Boolean, Boolean> b =
|
||||
checkDaughter(parent, rowContent, HConstants.SPLITB_QUALIFIER);
|
||||
if (!hasReferencesA && !hasReferencesB) {
|
||||
if ((a.getFirst() && !a.getSecond()) && (b.getFirst() && !b.getSecond())) {
|
||||
LOG.debug("Deleting region " + parent.getRegionNameAsString() +
|
||||
" because daughter splits no longer hold references");
|
||||
// This latter regionOffline should not be necessary but is done for now
|
||||
|
@ -197,14 +197,26 @@ class CatalogJanitor extends Chore {
|
|||
* @param parent
|
||||
* @param rowContent
|
||||
* @param qualifier
|
||||
* @return True if this daughter still has references to the parent.
|
||||
* @return A pair where the first boolean says whether or not the daughter
|
||||
* region directory exists in the filesystem and then the second boolean says
|
||||
* whether the daughter has references to the parent.
|
||||
* @throws IOException
|
||||
*/
|
||||
boolean checkDaughter(final HRegionInfo parent,
|
||||
Pair<Boolean, Boolean> checkDaughter(final HRegionInfo parent,
|
||||
final Result rowContent, final byte [] qualifier)
|
||||
throws IOException {
|
||||
HRegionInfo hri = getDaughterRegionInfo(rowContent, qualifier);
|
||||
return hasReferences(parent, rowContent, hri, qualifier);
|
||||
Pair<Boolean, Boolean> result =
|
||||
checkDaughterInFs(parent, rowContent, hri, qualifier);
|
||||
if (result.getFirst() && !result.getSecond()) {
|
||||
// Remove daughter from the parent IFF the daughter region exists in FS.
|
||||
// If there is no daughter region in the filesystem, must be because of
|
||||
// a failed split. The ServerShutdownHandler will do the fixup. Don't
|
||||
// do any deletes in here that could intefere with ServerShutdownHandler
|
||||
// fixup
|
||||
removeDaughterFromParent(parent, hri, qualifier);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -242,23 +254,35 @@ class CatalogJanitor extends Chore {
|
|||
/**
|
||||
* Checks if a daughter region -- either splitA or splitB -- still holds
|
||||
* references to parent. If not, removes reference to the split from
|
||||
* the parent meta region row so we don't check it any more.
|
||||
* the parent meta region row so we don't check it any more. Also checks
|
||||
* daughter region exists in the filesytem.
|
||||
* @param parent Parent region name.
|
||||
* @param rowContent Keyed content of the parent row in meta region.
|
||||
* @param split Which column family.
|
||||
* @param qualifier Which of the daughters to look at, splitA or splitB.
|
||||
* @return True if still has references to parent.
|
||||
* @return A pair where the first boolean says whether or not the daughter
|
||||
* region directory exists in the filesystem and then the second boolean says
|
||||
* whether the daughter has references to the parent.
|
||||
* @throws IOException
|
||||
*/
|
||||
boolean hasReferences(final HRegionInfo parent,
|
||||
Pair<Boolean, Boolean> checkDaughterInFs(final HRegionInfo parent,
|
||||
final Result rowContent, final HRegionInfo split,
|
||||
final byte [] qualifier)
|
||||
throws IOException {
|
||||
boolean result = false;
|
||||
if (split == null) return result;
|
||||
boolean references = false;
|
||||
boolean exists = false;
|
||||
if (split == null) {
|
||||
return new Pair<Boolean, Boolean>(Boolean.FALSE, Boolean.FALSE);
|
||||
}
|
||||
FileSystem fs = this.services.getMasterFileSystem().getFileSystem();
|
||||
Path rootdir = this.services.getMasterFileSystem().getRootDir();
|
||||
Path tabledir = new Path(rootdir, split.getTableNameAsString());
|
||||
Path regiondir = new Path(tabledir, split.getEncodedName());
|
||||
exists = fs.exists(regiondir);
|
||||
if (!exists) {
|
||||
LOG.warn("Daughter regiondir does not exist: " + regiondir.toString());
|
||||
return new Pair<Boolean, Boolean>(exists, Boolean.FALSE);
|
||||
}
|
||||
HTableDescriptor parentDescriptor = getTableDescriptor(parent.getTableName());
|
||||
|
||||
for (HColumnDescriptor family: parentDescriptor.getFamilies()) {
|
||||
|
@ -275,18 +299,16 @@ class CatalogJanitor extends Chore {
|
|||
);
|
||||
|
||||
if (ps != null && ps.length > 0) {
|
||||
result = true;
|
||||
references = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!result) {
|
||||
removeDaughterFromParent(parent, split, qualifier);
|
||||
}
|
||||
return result;
|
||||
return new Pair<Boolean, Boolean>(Boolean.valueOf(exists),
|
||||
Boolean.valueOf(references));
|
||||
}
|
||||
|
||||
private HTableDescriptor getTableDescriptor(byte[] tableName)
|
||||
throws TableExistsException, FileNotFoundException, IOException {
|
||||
return this.services.getTableDescriptors().get(Bytes.toString(tableName));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -237,11 +237,12 @@ public class ServerShutdownHandler extends EventHandler {
|
|||
*/
|
||||
static void fixupDaughters(final Result result,
|
||||
final AssignmentManager assignmentManager,
|
||||
final CatalogTracker catalogTracker) throws IOException {
|
||||
final CatalogTracker catalogTracker)
|
||||
throws IOException {
|
||||
fixupDaughter(result, HConstants.SPLITA_QUALIFIER, assignmentManager,
|
||||
catalogTracker);
|
||||
catalogTracker);
|
||||
fixupDaughter(result, HConstants.SPLITB_QUALIFIER, assignmentManager,
|
||||
catalogTracker);
|
||||
catalogTracker);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -282,8 +283,8 @@ public class ServerShutdownHandler extends EventHandler {
|
|||
}
|
||||
|
||||
/**
|
||||
* Look for presence of the daughter OR of a split of the daughter. Daughter
|
||||
* could have been split over on regionserver before a run of the
|
||||
* Look for presence of the daughter OR of a split of the daughter in .META.
|
||||
* Daughter could have been split over on regionserver before a run of the
|
||||
* catalogJanitor had chance to clear reference from parent.
|
||||
* @param daughter Daughter region to search for.
|
||||
* @throws IOException
|
||||
|
@ -350,4 +351,4 @@ public class ServerShutdownHandler extends EventHandler {
|
|||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1380,7 +1380,8 @@ public class HRegionServer implements HRegionInterface, HBaseRPCErrorHandler,
|
|||
public void postOpenDeployTasks(final HRegion r, final CatalogTracker ct,
|
||||
final boolean daughter)
|
||||
throws KeeperException, IOException {
|
||||
LOG.info("HRS.PostOpenDeployTasks");
|
||||
LOG.info("Post open deploy tasks for region=" + r.getRegionNameAsString() +
|
||||
", daughter=" + daughter);
|
||||
// Do checks to see if we need to compact (references or too many files)
|
||||
for (Store s : r.getStores().values()) {
|
||||
if (s.hasReferences() || s.needsCompaction()) {
|
||||
|
@ -1393,32 +1394,23 @@ public class HRegionServer implements HRegionInterface, HBaseRPCErrorHandler,
|
|||
LOG.info("addToOnlineRegions is done" + r.getRegionInfo());
|
||||
// Update ZK, ROOT or META
|
||||
if (r.getRegionInfo().isRootRegion()) {
|
||||
|
||||
LOG.info("setRootLocation");
|
||||
RootLocationEditor.setRootLocation(getZooKeeper(),
|
||||
this.serverNameFromMasterPOV);
|
||||
} else if (r.getRegionInfo().isMetaRegion()) {
|
||||
LOG.info("updateMetaLocation");
|
||||
|
||||
MetaEditor.updateMetaLocation(ct, r.getRegionInfo(),
|
||||
this.serverNameFromMasterPOV);
|
||||
} else {
|
||||
LOG.info("updateMetaLocation 111");
|
||||
|
||||
if (daughter) {
|
||||
LOG.info("updateMetaLocation 22");
|
||||
|
||||
// If daughter of a split, update whole row, not just location.
|
||||
MetaEditor.addDaughter(ct, r.getRegionInfo(),
|
||||
this.serverNameFromMasterPOV);
|
||||
} else {
|
||||
LOG.info("updateMetaLocation 33");
|
||||
|
||||
MetaEditor.updateRegionLocation(ct, r.getRegionInfo(),
|
||||
this.serverNameFromMasterPOV);
|
||||
}
|
||||
}
|
||||
LOG.info("END HRS.PostOpenDeployTasks");
|
||||
LOG.info("Done with post open deploy taks for region=" +
|
||||
r.getRegionNameAsString() + ", daughter=" + daughter);
|
||||
|
||||
}
|
||||
|
||||
|
|
|
@ -24,7 +24,6 @@ import java.io.IOException;
|
|||
import org.apache.hadoop.hbase.catalog.CatalogTracker;
|
||||
import org.apache.hadoop.hbase.ipc.HBaseRpcMetrics;
|
||||
import org.apache.hadoop.hbase.regionserver.wal.HLog;
|
||||
import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
|
||||
import org.apache.zookeeper.KeeperException;
|
||||
import java.util.Set;
|
||||
|
||||
|
|
|
@ -62,15 +62,20 @@ class SplitRequest implements Runnable {
|
|||
st.execute(this.server, this.server);
|
||||
} catch (Exception e) {
|
||||
try {
|
||||
LOG.info("Running rollback of failed split of " + parent + "; "
|
||||
+ e.getMessage());
|
||||
st.rollback(this.server, this.server);
|
||||
LOG.info("Successful rollback of failed split of " + parent);
|
||||
LOG.info("Running rollback/cleanup of failed split of " +
|
||||
parent.getRegionNameAsString() + "; " + e.getMessage());
|
||||
if (st.rollback(this.server, this.server)) {
|
||||
LOG.info("Successful rollback of failed split of " +
|
||||
parent.getRegionNameAsString());
|
||||
} else {
|
||||
this.server.abort("Abort; we got an error after point-of-no-return");
|
||||
}
|
||||
} catch (RuntimeException ee) {
|
||||
// If failed rollback, kill server to avoid having a hole in table.
|
||||
LOG.info("Failed rollback of failed split of "
|
||||
+ parent.getRegionNameAsString() + " -- aborting server", ee);
|
||||
this.server.abort("Failed split");
|
||||
String msg = "Failed rollback of failed split of " +
|
||||
parent.getRegionNameAsString() + " -- aborting server";
|
||||
// If failed rollback, kill this server to avoid having a hole in table.
|
||||
LOG.info(msg, ee);
|
||||
this.server.abort(msg);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
|
|
@ -60,18 +60,18 @@ import com.google.common.util.concurrent.ThreadFactoryBuilder;
|
|||
|
||||
/**
|
||||
* Executes region split as a "transaction". Call {@link #prepare()} to setup
|
||||
* the transaction, {@link #execute(OnlineRegions)} to run the transaction and
|
||||
* {@link #rollback(OnlineRegions)} to cleanup if execute fails.
|
||||
* the transaction, {@link #execute(Server, RegionServerServices)} to run the
|
||||
* transaction and {@link #rollback(OnlineRegions)} to cleanup if execute fails.
|
||||
*
|
||||
* <p>Here is an example of how you would use this class:
|
||||
* <pre>
|
||||
* SplitTransaction st = new SplitTransaction(this.conf, parent, midKey)
|
||||
* if (!st.prepare()) return;
|
||||
* try {
|
||||
* st.execute(myOnlineRegions);
|
||||
* st.execute(server, services);
|
||||
* } catch (IOException ioe) {
|
||||
* try {
|
||||
* st.rollback(myOnlineRegions);
|
||||
* st.rollback(server, services);
|
||||
* return;
|
||||
* } catch (RuntimeException e) {
|
||||
* myAbortable.abort("Failed split, abort");
|
||||
|
@ -101,7 +101,9 @@ public class SplitTransaction {
|
|||
private final byte [] splitrow;
|
||||
|
||||
/**
|
||||
* Types to add to the transaction journal
|
||||
* Types to add to the transaction journal.
|
||||
* Each enum is a step in the split transaction. Used to figure how much
|
||||
* we need to rollback.
|
||||
*/
|
||||
enum JournalEntry {
|
||||
/**
|
||||
|
@ -127,7 +129,13 @@ public class SplitTransaction {
|
|||
/**
|
||||
* Started in on the creation of the second daughter region.
|
||||
*/
|
||||
STARTED_REGION_B_CREATION
|
||||
STARTED_REGION_B_CREATION,
|
||||
/**
|
||||
* Point of no return.
|
||||
* If we got here, then transaction is not recoverable other than by
|
||||
* crashing out the regionserver.
|
||||
*/
|
||||
PONR
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -137,7 +145,7 @@ public class SplitTransaction {
|
|||
|
||||
/**
|
||||
* Constructor
|
||||
* @param services So we can online new servces. If null, we'll skip onlining
|
||||
* @param services So we can online new regions. If null, we'll skip onlining
|
||||
* (Useful testing).
|
||||
* @param c Configuration to use running split
|
||||
* @param r Region to split
|
||||
|
@ -156,7 +164,7 @@ public class SplitTransaction {
|
|||
*/
|
||||
public boolean prepare() {
|
||||
if (this.parent.isClosed() || this.parent.isClosing()) return false;
|
||||
// Split key can be false if this region is unsplittable; i.e. has refs.
|
||||
// Split key can be null if this region is unsplittable; i.e. has refs.
|
||||
if (this.splitrow == null) return false;
|
||||
HRegionInfo hri = this.parent.getRegionInfo();
|
||||
parent.prepareToSplit();
|
||||
|
@ -196,13 +204,14 @@ public class SplitTransaction {
|
|||
|
||||
/**
|
||||
* Run the transaction.
|
||||
* @param server Hosting server instance.
|
||||
* @param server Hosting server instance. Can be null when testing (won't try
|
||||
* and update in zk if a null server)
|
||||
* @param services Used to online/offline regions.
|
||||
* @throws IOException If thrown, transaction failed. Call {@link #rollback(OnlineRegions)}
|
||||
* @throws IOException If thrown, transaction failed. Call {@link #rollback(Server, RegionServerServices)}
|
||||
* @return Regions created
|
||||
* @throws KeeperException
|
||||
* @throws NodeExistsException
|
||||
* @see #rollback(OnlineRegions)
|
||||
* @see #rollback(Server, RegionServerServices)
|
||||
*/
|
||||
public PairOfSameType<HRegion> execute(final Server server,
|
||||
final RegionServerServices services)
|
||||
|
@ -227,7 +236,7 @@ public class SplitTransaction {
|
|||
this.fileSplitTimeout);
|
||||
|
||||
// Set ephemeral SPLITTING znode up in zk. Mocked servers sometimes don't
|
||||
// have zookeeper so don't do zk stuff if zookeeper is null
|
||||
// have zookeeper so don't do zk stuff if server or zookeeper is null
|
||||
if (server != null && server.getZooKeeper() != null) {
|
||||
try {
|
||||
this.znodeVersion = createNodeSplitting(server.getZooKeeper(),
|
||||
|
@ -259,13 +268,13 @@ public class SplitTransaction {
|
|||
}
|
||||
this.journal.add(JournalEntry.OFFLINED_PARENT);
|
||||
|
||||
// TODO: If the below were multithreaded would we complete steps in less
|
||||
// elapsed time? St.Ack 20100920
|
||||
|
||||
splitStoreFiles(this.splitdir, hstoreFilesToSplit);
|
||||
// TODO: If splitStoreFiles were multithreaded would we complete steps in
|
||||
// less elapsed time? St.Ack 20100920
|
||||
//
|
||||
// splitStoreFiles creates daughter region dirs under the parent splits dir
|
||||
// Nothing to unroll here if failure -- clean up of CREATE_SPLIT_DIR will
|
||||
// clean this up.
|
||||
splitStoreFiles(this.splitdir, hstoreFilesToSplit);
|
||||
|
||||
// Log to the journal that we are creating region A, the first daughter
|
||||
// region. We could fail halfway through. If we do, we could have left
|
||||
|
@ -278,27 +287,38 @@ public class SplitTransaction {
|
|||
this.journal.add(JournalEntry.STARTED_REGION_B_CREATION);
|
||||
HRegion b = createDaughterRegion(this.hri_b, this.parent.rsServices);
|
||||
|
||||
// Edit parent in meta
|
||||
// Edit parent in meta. Offlines parent region and adds splita and splitb.
|
||||
if (!testing) {
|
||||
MetaEditor.offlineParentInMeta(server.getCatalogTracker(),
|
||||
this.parent.getRegionInfo(), a.getRegionInfo(), b.getRegionInfo());
|
||||
}
|
||||
|
||||
// This is the point of no return. We are committed to the split now. We
|
||||
// have still the daughter regions to open but meta has been changed.
|
||||
// If we fail from here on out, we cannot rollback so, we'll just abort.
|
||||
if (!testing) {
|
||||
// This is the point of no return. Adding subsequent edits to .META. as we
|
||||
// do below when we do the daugther opens adding each to .META. can fail in
|
||||
// various interesting ways the most interesting of which is a timeout
|
||||
// BUT the edits all go through (See HBASE-3872). IF we reach the POWR
|
||||
// then subsequent failures need to crash out this regionserver; the
|
||||
// server shutdown processing should be able to fix-up the incomplete split.
|
||||
this.journal.add(JournalEntry.PONR);
|
||||
// Open daughters in parallel.
|
||||
DaughterOpener aOpener = new DaughterOpener(server, services, a);
|
||||
DaughterOpener bOpener = new DaughterOpener(server, services, b);
|
||||
aOpener.start();
|
||||
bOpener.start();
|
||||
try {
|
||||
aOpener.join();
|
||||
bOpener.join();
|
||||
} catch (InterruptedException e) {
|
||||
server.abort("Exception running daughter opens", e);
|
||||
}
|
||||
DaughterOpener aOpener = new DaughterOpener(server, services, a);
|
||||
DaughterOpener bOpener = new DaughterOpener(server, services, b);
|
||||
aOpener.start();
|
||||
bOpener.start();
|
||||
try {
|
||||
aOpener.join();
|
||||
bOpener.join();
|
||||
} catch (InterruptedException e) {
|
||||
Thread.currentThread().interrupt();
|
||||
throw new IOException("Interrupted " + e.getMessage());
|
||||
}
|
||||
if (aOpener.getException() != null) {
|
||||
throw new IOException("Failed " +
|
||||
aOpener.getName(), aOpener.getException());
|
||||
}
|
||||
if (bOpener.getException() != null) {
|
||||
throw new IOException("Failed " +
|
||||
bOpener.Name(), bOpener.getException());
|
||||
}
|
||||
|
||||
// Tell master about split by updating zk. If we fail, abort.
|
||||
|
@ -328,12 +348,10 @@ public class SplitTransaction {
|
|||
if (e instanceof InterruptedException) {
|
||||
Thread.currentThread().interrupt();
|
||||
}
|
||||
server.abort("Failed telling master about split", e);
|
||||
throw new IOException("Failed telling master about split", e);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
// Coprocessor callback
|
||||
if (this.parent.getCoprocessorHost() != null) {
|
||||
this.parent.getCoprocessorHost().postSplit(a,b);
|
||||
|
@ -345,10 +363,15 @@ public class SplitTransaction {
|
|||
return new PairOfSameType<HRegion>(a, b);
|
||||
}
|
||||
|
||||
/*
|
||||
* Open daughter region in its own thread.
|
||||
* If we fail, abort this hosting server.
|
||||
*/
|
||||
class DaughterOpener extends Thread {
|
||||
private final RegionServerServices services;
|
||||
private final Server server;
|
||||
private final HRegion r;
|
||||
private Throwable t = null;
|
||||
|
||||
DaughterOpener(final Server s, final RegionServerServices services,
|
||||
final HRegion r) {
|
||||
|
@ -359,13 +382,20 @@ public class SplitTransaction {
|
|||
this.r = r;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return Null if open succeeded else exception that causes us fail open.
|
||||
* Call it after this thread exits else you may get wrong view on result.
|
||||
*/
|
||||
Throwable getException() {
|
||||
return this.t;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void run() {
|
||||
try {
|
||||
openDaughterRegion(this.server, this.services, r);
|
||||
} catch (Throwable t) {
|
||||
this.server.abort("Failed open of daughter " +
|
||||
this.r.getRegionInfo().getRegionNameAsString(), t);
|
||||
this.t = t;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -373,7 +403,7 @@ public class SplitTransaction {
|
|||
/**
|
||||
* Open daughter regions, add them to online list and update meta.
|
||||
* @param server
|
||||
* @param services
|
||||
* @param services Can be null when testing.
|
||||
* @param daughter
|
||||
* @throws IOException
|
||||
* @throws KeeperException
|
||||
|
@ -381,20 +411,22 @@ public class SplitTransaction {
|
|||
void openDaughterRegion(final Server server,
|
||||
final RegionServerServices services, final HRegion daughter)
|
||||
throws IOException, KeeperException {
|
||||
if (server.isStopped() || services.isStopping()) {
|
||||
boolean stopping = services != null && services.isStopping();
|
||||
if (server.isStopped() || stopping) {
|
||||
MetaEditor.addDaughter(server.getCatalogTracker(),
|
||||
daughter.getRegionInfo(), null);
|
||||
LOG.info("Not opening daughter " +
|
||||
daughter.getRegionInfo().getRegionNameAsString() +
|
||||
" because stopping=" + services.isStopping() + ", stopped=" +
|
||||
server.isStopped());
|
||||
" because stopping=" + stopping + ", stopped=" + server.isStopped());
|
||||
return;
|
||||
}
|
||||
HRegionInfo hri = daughter.getRegionInfo();
|
||||
LoggingProgressable reporter =
|
||||
new LoggingProgressable(hri, server.getConfiguration());
|
||||
HRegion r = daughter.openHRegion(reporter);
|
||||
services.postOpenDeployTasks(r, server.getCatalogTracker(), true);
|
||||
if (services != null) {
|
||||
services.postOpenDeployTasks(r, server.getCatalogTracker(), true);
|
||||
}
|
||||
}
|
||||
|
||||
static class LoggingProgressable implements CancelableProgressable {
|
||||
|
@ -598,16 +630,19 @@ public class SplitTransaction {
|
|||
}
|
||||
|
||||
/**
|
||||
* @param or Object that can online/offline parent region. Can be passed null
|
||||
* by unit tests.
|
||||
* @return The region we were splitting
|
||||
* @param server Hosting server instance (May be null when testing).
|
||||
* @param services
|
||||
* @throws IOException If thrown, rollback failed. Take drastic action.
|
||||
* @return True if we successfully rolled back, false if we got to the point
|
||||
* of no return and so now need to abort the server to minimize damage.
|
||||
*/
|
||||
public void rollback(final Server server, final OnlineRegions or)
|
||||
public boolean rollback(final Server server, final RegionServerServices services)
|
||||
throws IOException {
|
||||
boolean result = true;
|
||||
FileSystem fs = this.parent.getFilesystem();
|
||||
ListIterator<JournalEntry> iterator =
|
||||
this.journal.listIterator(this.journal.size());
|
||||
// Iterate in reverse.
|
||||
while (iterator.hasPrevious()) {
|
||||
JournalEntry je = iterator.previous();
|
||||
switch(je) {
|
||||
|
@ -642,13 +677,19 @@ public class SplitTransaction {
|
|||
break;
|
||||
|
||||
case OFFLINED_PARENT:
|
||||
if (or != null) or.addToOnlineRegions(this.parent);
|
||||
if (services != null) services.addToOnlineRegions(this.parent);
|
||||
break;
|
||||
|
||||
case PONR:
|
||||
// We got to the point-of-no-return so we need to just abort. Return
|
||||
// immediately.
|
||||
return false;
|
||||
|
||||
default:
|
||||
throw new RuntimeException("Unhandled journal entry: " + je);
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
HRegionInfo getFirstDaughter() {
|
||||
|
|
|
@ -113,7 +113,7 @@ public class TestCatalogTracker {
|
|||
@Test public void testThatIfMETAMovesWeAreNotified()
|
||||
throws IOException, InterruptedException, KeeperException {
|
||||
HConnection connection = Mockito.mock(HConnection.class);
|
||||
final CatalogTracker ct = constructAndStartCatalogTracker(connection);
|
||||
constructAndStartCatalogTracker(connection);
|
||||
try {
|
||||
RootLocationEditor.setRootLocation(this.watcher,
|
||||
new ServerName("example.com", 1234, System.currentTimeMillis()));
|
||||
|
|
|
@ -293,6 +293,14 @@ public class TestCatalogJanitor {
|
|||
assertFalse(janitor.cleanParent(parent, r));
|
||||
// Remove the reference file and try again.
|
||||
assertTrue(fs.delete(p, true));
|
||||
// We will fail!!! Because split b is empty, which is right... we should
|
||||
// not remove parent if daughters do not exist in fs.
|
||||
assertFalse(janitor.cleanParent(parent, r));
|
||||
// Put in place daughter dir for b... that should make it so parent gets
|
||||
// cleaned up.
|
||||
storedir = Store.getStoreHomedir(tabledir, splitb.getEncodedName(),
|
||||
htd.getColumnFamilies()[0].getName());
|
||||
assertTrue(fs.mkdirs(storedir));
|
||||
assertTrue(janitor.cleanParent(parent, r));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -42,6 +42,7 @@ import org.apache.hadoop.hbase.client.Scan;
|
|||
import org.apache.hadoop.hbase.regionserver.wal.HLog;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
import org.apache.hadoop.hbase.util.PairOfSameType;
|
||||
import org.apache.zookeeper.KeeperException;
|
||||
import org.junit.After;
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
|
@ -84,6 +85,46 @@ public class TestSplitTransaction {
|
|||
this.fs.delete(this.testdir, true);
|
||||
}
|
||||
|
||||
@Test public void testFailAfterPONR() throws IOException, KeeperException {
|
||||
final int rowcount = TEST_UTIL.loadRegion(this.parent, CF);
|
||||
assertTrue(rowcount > 0);
|
||||
int parentRowCount = countRows(this.parent);
|
||||
assertEquals(rowcount, parentRowCount);
|
||||
|
||||
// Start transaction.
|
||||
SplitTransaction st = prepareGOOD_SPLIT_ROW();
|
||||
SplitTransaction spiedUponSt = spy(st);
|
||||
Mockito.doThrow(new MockedFailedDaughterOpen()).
|
||||
when(spiedUponSt).openDaughterRegion((Server)Mockito.anyObject(),
|
||||
(RegionServerServices)Mockito.anyObject(), (HRegion)Mockito.anyObject());
|
||||
|
||||
// Run the execute. Look at what it returns.
|
||||
boolean expectedException = false;
|
||||
Server mockServer = Mockito.mock(Server.class);
|
||||
when(mockServer.getConfiguration()).thenReturn(TEST_UTIL.getConfiguration());
|
||||
try {
|
||||
spiedUponSt.execute(mockServer, null);
|
||||
} catch (IOException e) {
|
||||
if (e.getCause() != null &&
|
||||
e.getCause() instanceof MockedFailedDaughterOpen) {
|
||||
expectedException = true;
|
||||
}
|
||||
}
|
||||
assertTrue(expectedException);
|
||||
// Run rollback returns that we should restart.
|
||||
assertFalse(spiedUponSt.rollback(null, null));
|
||||
// Make sure that region a and region b are still in the filesystem, that
|
||||
// they have not been removed; this is supposed to be the case if we go
|
||||
// past point of no return.
|
||||
Path tableDir = this.parent.getRegionDir().getParent();
|
||||
Path daughterADir =
|
||||
new Path(tableDir, spiedUponSt.getFirstDaughter().getEncodedName());
|
||||
Path daughterBDir =
|
||||
new Path(tableDir, spiedUponSt.getSecondDaughter().getEncodedName());
|
||||
assertTrue(TEST_UTIL.getTestFileSystem().exists(daughterADir));
|
||||
assertTrue(TEST_UTIL.getTestFileSystem().exists(daughterBDir));
|
||||
}
|
||||
|
||||
/**
|
||||
* Test straight prepare works. Tries to split on {@link #GOOD_SPLIT_ROW}
|
||||
* @throws IOException
|
||||
|
@ -190,7 +231,7 @@ public class TestSplitTransaction {
|
|||
}
|
||||
assertTrue(expectedException);
|
||||
// Run rollback
|
||||
spiedUponSt.rollback(null, null);
|
||||
assertTrue(spiedUponSt.rollback(null, null));
|
||||
|
||||
// Assert I can scan parent.
|
||||
int parentRowCount2 = countRows(this.parent);
|
||||
|
@ -228,6 +269,7 @@ public class TestSplitTransaction {
|
|||
*/
|
||||
@SuppressWarnings("serial")
|
||||
private class MockedFailedDaughterCreation extends IOException {}
|
||||
private class MockedFailedDaughterOpen extends IOException {}
|
||||
|
||||
private int countRows(final HRegion r) throws IOException {
|
||||
int rowcount = 0;
|
||||
|
|
Loading…
Reference in New Issue