HBASE-4238 CatalogJanitor can clear a daughter that split before processing its parent

git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1170291 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael Stack 2011-09-13 18:36:54 +00:00
parent 7df33f9875
commit 3bda78ec61
5 changed files with 224 additions and 50 deletions

View File

@ -570,6 +570,8 @@ Release 0.90.5 - Unreleased
HBASE-4180 HBase should check the isSecurityEnabled flag before login HBASE-4180 HBase should check the isSecurityEnabled flag before login
HBASE-4325 Improve error message when using STARTROW for meta scans HBASE-4325 Improve error message when using STARTROW for meta scans
(Jonathan Hsieh) (Jonathan Hsieh)
HBASE-4238 CatalogJanitor can clear a daughter that split before
processing its parent
IMPROVEMENT IMPROVEMENT
HBASE-4205 Enhance HTable javadoc (Eric Charles) HBASE-4205 Enhance HTable javadoc (Eric Charles)

View File

@ -34,9 +34,9 @@ import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter; import org.apache.hadoop.fs.PathFilter;
import org.apache.hadoop.hbase.Chore; import org.apache.hadoop.hbase.Chore;
import org.apache.hadoop.hbase.HColumnDescriptor; import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.Server; import org.apache.hadoop.hbase.Server;
import org.apache.hadoop.hbase.TableExistsException; import org.apache.hadoop.hbase.TableExistsException;
import org.apache.hadoop.hbase.catalog.MetaEditor; import org.apache.hadoop.hbase.catalog.MetaEditor;
@ -47,7 +47,6 @@ import org.apache.hadoop.hbase.regionserver.Store;
import org.apache.hadoop.hbase.regionserver.StoreFile; import org.apache.hadoop.hbase.regionserver.StoreFile;
import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.Pair; import org.apache.hadoop.hbase.util.Pair;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.Writables; import org.apache.hadoop.hbase.util.Writables;
@ -107,26 +106,7 @@ class CatalogJanitor extends Chore {
// Keep Map of found split parents. There are candidates for cleanup. // Keep Map of found split parents. There are candidates for cleanup.
// Use a comparator that has split parents come before its daughters. // Use a comparator that has split parents come before its daughters.
final Map<HRegionInfo, Result> splitParents = final Map<HRegionInfo, Result> splitParents =
new TreeMap<HRegionInfo, Result>(new Comparator<HRegionInfo> () { new TreeMap<HRegionInfo, Result>(new SplitParentFirstComparator());
@Override
public int compare(HRegionInfo left, HRegionInfo right) {
// This comparator differs from the one HRegionInfo in that it sorts
// parent before daughters.
if (left == null) return -1;
if (right == null) return 1;
// Same table name.
int result = Bytes.compareTo(left.getTableName(),
right.getTableName());
if (result != 0) return result;
// Compare start keys.
result = Bytes.compareTo(left.getStartKey(), right.getStartKey());
if (result != 0) return result;
// Compare end keys.
result = Bytes.compareTo(left.getEndKey(), right.getEndKey());
if (result != 0) return -result; // Flip the result so parent comes first.
return result;
}
});
// This visitor collects split parents and counts rows in the .META. table // This visitor collects split parents and counts rows in the .META. table
MetaReader.Visitor visitor = new MetaReader.Visitor() { MetaReader.Visitor visitor = new MetaReader.Visitor() {
@Override @Override
@ -156,6 +136,31 @@ class CatalogJanitor extends Chore {
} }
} }
/**
* Compare HRegionInfos in a way that has split parents sort BEFORE their
* daughters.
*/
static class SplitParentFirstComparator implements Comparator<HRegionInfo> {
@Override
public int compare(HRegionInfo left, HRegionInfo right) {
// This comparator differs from the one HRegionInfo in that it sorts
// parent before daughters.
if (left == null) return -1;
if (right == null) return 1;
// Same table name.
int result = Bytes.compareTo(left.getTableName(),
right.getTableName());
if (result != 0) return result;
// Compare start keys.
result = Bytes.compareTo(left.getStartKey(), right.getStartKey());
if (result != 0) return result;
// Compare end keys.
result = Bytes.compareTo(left.getEndKey(), right.getEndKey());
if (result != 0) return -result; // Flip the result so parent comes first.
return result;
}
}
/** /**
* Get HRegionInfo from passed Map of row values. * Get HRegionInfo from passed Map of row values.
* @param result Map to do lookup in. * @param result Map to do lookup in.
@ -192,7 +197,7 @@ class CatalogJanitor extends Chore {
checkDaughter(parent, rowContent, HConstants.SPLITA_QUALIFIER); checkDaughter(parent, rowContent, HConstants.SPLITA_QUALIFIER);
Pair<Boolean, Boolean> b = Pair<Boolean, Boolean> b =
checkDaughter(parent, rowContent, HConstants.SPLITB_QUALIFIER); checkDaughter(parent, rowContent, HConstants.SPLITB_QUALIFIER);
if ((a.getFirst() && !a.getSecond()) && (b.getFirst() && !b.getSecond())) { if (hasNoReferences(a) && hasNoReferences(b)) {
LOG.debug("Deleting region " + parent.getRegionNameAsString() + LOG.debug("Deleting region " + parent.getRegionNameAsString() +
" because daughter splits no longer hold references"); " because daughter splits no longer hold references");
// This latter regionOffline should not be necessary but is done for now // This latter regionOffline should not be necessary but is done for now
@ -211,7 +216,16 @@ class CatalogJanitor extends Chore {
return result; return result;
} }
/**
* @param p A pair where the first boolean says whether or not the daughter
* region directory exists in the filesystem and then the second boolean says
* whether the daughter has references to the parent.
* @return True the passed <code>p</code> signifies no references.
*/
private boolean hasNoReferences(final Pair<Boolean, Boolean> p) {
return !p.getFirst() || !p.getSecond();
}
/** /**
* See if the passed daughter has references in the filesystem to the parent * See if the passed daughter has references in the filesystem to the parent
* and if not, remove the note of daughter region in the parent row: its * and if not, remove the note of daughter region in the parent row: its

View File

@ -308,6 +308,11 @@ public class ServerShutdownHandler extends EventHandler {
if (isDaughterMissing(catalogTracker, daughter)) { if (isDaughterMissing(catalogTracker, daughter)) {
LOG.info("Fixup; missing daughter " + daughter.getRegionNameAsString()); LOG.info("Fixup; missing daughter " + daughter.getRegionNameAsString());
MetaEditor.addDaughter(catalogTracker, daughter, null); MetaEditor.addDaughter(catalogTracker, daughter, null);
// TODO: Log WARN if the regiondir does not exist in the fs. If its not
// there then something wonky about the split -- things will keep going
// but could be missing references to parent region.
// And assign it. // And assign it.
assignmentManager.assign(daughter, true); assignmentManager.assign(daughter, true);
} else { } else {

View File

@ -288,17 +288,28 @@ public class SplitTransaction {
HRegion b = createDaughterRegion(this.hri_b, this.parent.rsServices); HRegion b = createDaughterRegion(this.hri_b, this.parent.rsServices);
// Edit parent in meta. Offlines parent region and adds splita and splitb. // Edit parent in meta. Offlines parent region and adds splita and splitb.
// TODO: This can 'fail' by timing out against .META. but the edits could
// be applied anyways over on the server. There is no way to tell for sure.
// We could try and get the edits again subsequent to their application
// whether we fail or not but that could fail too. We should probably move
// the PONR to here before the edits go in but could mean we'd abort the
// regionserver when we didn't need to; i.e. the edits did not make it in.
if (!testing) { if (!testing) {
MetaEditor.offlineParentInMeta(server.getCatalogTracker(), MetaEditor.offlineParentInMeta(server.getCatalogTracker(),
this.parent.getRegionInfo(), a.getRegionInfo(), b.getRegionInfo()); this.parent.getRegionInfo(), a.getRegionInfo(), b.getRegionInfo());
} }
// This is the point of no return. Adding subsequent edits to .META. as we // This is the point of no return. Adding subsequent edits to .META. as we
// do below when we do the daugther opens adding each to .META. can fail in // do below when we do the daughter opens adding each to .META. can fail in
// various interesting ways the most interesting of which is a timeout // various interesting ways the most interesting of which is a timeout
// BUT the edits all go through (See HBASE-3872). IF we reach the POWR // BUT the edits all go through (See HBASE-3872). IF we reach the PONR
// then subsequent failures need to crash out this regionserver; the // then subsequent failures need to crash out this regionserver; the
// server shutdown processing should be able to fix-up the incomplete split. // server shutdown processing should be able to fix-up the incomplete split.
// The offlined parent will have the daughters as extra columns. If
// we leave the daughter regions in place and do not remove them when we
// crash out, then they will have their references to the parent in place
// still and the server shutdown fixup of .META. will point to these
// regions.
this.journal.add(JournalEntry.PONR); this.journal.add(JournalEntry.PONR);
// Open daughters in parallel. // Open daughters in parallel.
DaughterOpener aOpener = new DaughterOpener(server, services, a); DaughterOpener aOpener = new DaughterOpener(server, services, a);
@ -684,7 +695,9 @@ public class SplitTransaction {
case PONR: case PONR:
// We got to the point-of-no-return so we need to just abort. Return // We got to the point-of-no-return so we need to just abort. Return
// immediately. // immediately. Do not clean up created daughter regions. They need
// to be in place so we don't delete the parent region mistakenly.
// See HBASE-3872.
return false; return false;
default: default:

View File

@ -29,6 +29,8 @@ import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.SortedMap;
import java.util.TreeMap;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileSystem;
@ -57,7 +59,6 @@ import org.junit.Test;
import org.mockito.Mockito; import org.mockito.Mockito;
public class TestCatalogJanitor { public class TestCatalogJanitor {
/** /**
* Pseudo server for below tests. * Pseudo server for below tests.
*/ */
@ -70,8 +71,7 @@ public class TestCatalogJanitor {
this.c = htu.getConfiguration(); this.c = htu.getConfiguration();
// Set hbase.rootdir into test dir. // Set hbase.rootdir into test dir.
FileSystem fs = FileSystem.get(this.c); FileSystem fs = FileSystem.get(this.c);
Path rootdir = Path rootdir = fs.makeQualified(new Path(this.c.get(HConstants.HBASE_DIR)));
fs.makeQualified(HBaseTestingUtility.getTestDir(HConstants.HBASE_DIR));
this.c.set(HConstants.HBASE_DIR, rootdir.toString()); this.c.set(HConstants.HBASE_DIR, rootdir.toString());
this.ct = Mockito.mock(CatalogTracker.class); this.ct = Mockito.mock(CatalogTracker.class);
HRegionInterface hri = Mockito.mock(HRegionInterface.class); HRegionInterface hri = Mockito.mock(HRegionInterface.class);
@ -211,9 +211,7 @@ public class TestCatalogJanitor {
@Override @Override
public HTableDescriptor get(String tablename) public HTableDescriptor get(String tablename)
throws TableExistsException, FileNotFoundException, IOException { throws TableExistsException, FileNotFoundException, IOException {
HTableDescriptor htd = new HTableDescriptor("table"); return createHTableDescriptor();
htd.addFamily(new HColumnDescriptor("family"));
return htd;
} }
@Override @Override
@ -255,12 +253,12 @@ public class TestCatalogJanitor {
@Test @Test
public void testCleanParent() throws IOException { public void testCleanParent() throws IOException {
HBaseTestingUtility htu = new HBaseTestingUtility(); HBaseTestingUtility htu = new HBaseTestingUtility();
setRootDirAndCleanIt(htu, "testCleanParent");
Server server = new MockServer(htu); Server server = new MockServer(htu);
MasterServices services = new MockMasterServices(server); MasterServices services = new MockMasterServices(server);
CatalogJanitor janitor = new CatalogJanitor(server, services); CatalogJanitor janitor = new CatalogJanitor(server, services);
// Create regions. // Create regions.
HTableDescriptor htd = new HTableDescriptor("table"); HTableDescriptor htd = createHTableDescriptor();
htd.addFamily(new HColumnDescriptor("family"));
HRegionInfo parent = HRegionInfo parent =
new HRegionInfo(htd.getName(), Bytes.toBytes("aaa"), new HRegionInfo(htd.getName(), Bytes.toBytes("aaa"),
Bytes.toBytes("eee")); Bytes.toBytes("eee"));
@ -272,12 +270,7 @@ public class TestCatalogJanitor {
Bytes.toBytes("eee")); Bytes.toBytes("eee"));
// Test that when both daughter regions are in place, that we do not // Test that when both daughter regions are in place, that we do not
// remove the parent. // remove the parent.
List<KeyValue> kvs = new ArrayList<KeyValue>(); Result r = createResult(parent, splita, splitb);
kvs.add(new KeyValue(parent.getRegionName(), HConstants.CATALOG_FAMILY,
HConstants.SPLITA_QUALIFIER, Writables.getBytes(splita)));
kvs.add(new KeyValue(parent.getRegionName(), HConstants.CATALOG_FAMILY,
HConstants.SPLITB_QUALIFIER, Writables.getBytes(splitb)));
Result r = new Result(kvs);
// Add a reference under splitA directory so we don't clear out the parent. // Add a reference under splitA directory so we don't clear out the parent.
Path rootdir = services.getMasterFileSystem().getRootDir(); Path rootdir = services.getMasterFileSystem().getRootDir();
Path tabledir = Path tabledir =
@ -293,14 +286,161 @@ public class TestCatalogJanitor {
assertFalse(janitor.cleanParent(parent, r)); assertFalse(janitor.cleanParent(parent, r));
// Remove the reference file and try again. // Remove the reference file and try again.
assertTrue(fs.delete(p, true)); assertTrue(fs.delete(p, true));
// We will fail!!! Because split b is empty, which is right... we should
// not remove parent if daughters do not exist in fs.
assertFalse(janitor.cleanParent(parent, r));
// Put in place daughter dir for b... that should make it so parent gets
// cleaned up.
storedir = Store.getStoreHomedir(tabledir, splitb.getEncodedName(),
htd.getColumnFamilies()[0].getName());
assertTrue(fs.mkdirs(storedir));
assertTrue(janitor.cleanParent(parent, r)); assertTrue(janitor.cleanParent(parent, r));
} }
}
/**
* Make sure parent gets cleaned up even if daughter is cleaned up before it.
* @throws IOException
* @throws InterruptedException
*/
@Test
public void testParentCleanedEvenIfDaughterGoneFirst()
throws IOException, InterruptedException {
HBaseTestingUtility htu = new HBaseTestingUtility();
setRootDirAndCleanIt(htu, "testParentCleanedEvenIfDaughterGoneFirst");
Server server = new MockServer(htu);
MasterServices services = new MockMasterServices(server);
CatalogJanitor janitor = new CatalogJanitor(server, services);
final HTableDescriptor htd = createHTableDescriptor();
// Create regions: aaa->eee, aaa->ccc, aaa->bbb, bbb->ccc, etc.
// Parent
HRegionInfo parent = new HRegionInfo(htd.getName(), Bytes.toBytes("aaa"),
Bytes.toBytes("eee"));
// Sleep a second else the encoded name on these regions comes out
// same for all with same start key and made in same second.
Thread.sleep(1001);
// Daughter a
HRegionInfo splita = new HRegionInfo(htd.getName(), Bytes.toBytes("aaa"),
Bytes.toBytes("ccc"));
Thread.sleep(1001);
// Make daughters of daughter a; splitaa and splitab.
HRegionInfo splitaa = new HRegionInfo(htd.getName(), Bytes.toBytes("aaa"),
Bytes.toBytes("bbb"));
HRegionInfo splitab = new HRegionInfo(htd.getName(), Bytes.toBytes("bbb"),
Bytes.toBytes("ccc"));
// Daughter b
HRegionInfo splitb = new HRegionInfo(htd.getName(), Bytes.toBytes("ccc"),
Bytes.toBytes("eee"));
Thread.sleep(1001);
// Make Daughters of daughterb; splitba and splitbb.
HRegionInfo splitba = new HRegionInfo(htd.getName(), Bytes.toBytes("ccc"),
Bytes.toBytes("ddd"));
HRegionInfo splitbb = new HRegionInfo(htd.getName(), Bytes.toBytes("ddd"),
Bytes.toBytes("eee"));
// First test that our Comparator works right up in CatalogJanitor.
// Just fo kicks.
SortedMap<HRegionInfo, Result> regions =
new TreeMap<HRegionInfo, Result>(new CatalogJanitor.SplitParentFirstComparator());
// Now make sure that this regions map sorts as we expect it to.
regions.put(parent, createResult(parent, splita, splitb));
regions.put(splitb, createResult(splitb, splitba, splitbb));
regions.put(splita, createResult(splita, splitaa, splitab));
// Assert its properly sorted.
int index = 0;
for (Map.Entry<HRegionInfo, Result> e: regions.entrySet()) {
if (index == 0) {
assertTrue(e.getKey().getEncodedName().equals(parent.getEncodedName()));
} else if (index == 1) {
assertTrue(e.getKey().getEncodedName().equals(splita.getEncodedName()));
} else if (index == 2) {
assertTrue(e.getKey().getEncodedName().equals(splitb.getEncodedName()));
}
index++;
}
// Now play around with the cleanParent function. Create a ref from splita
// up to the parent.
Path splitaRef =
createReferences(services, htd, parent, splita, Bytes.toBytes("ccc"), false);
// Make sure actual super parent sticks around because splita has a ref.
assertFalse(janitor.cleanParent(parent, regions.get(parent)));
//splitba, and split bb, do not have dirs in fs. That means that if
// we test splitb, it should get cleaned up.
assertTrue(janitor.cleanParent(splitb, regions.get(splitb)));
// Now remove ref from splita to parent... so parent can be let go and so
// the daughter splita can be split (can't split if still references).
// BUT make the timing such that the daughter gets cleaned up before we
// can get a chance to let go of the parent.
FileSystem fs = FileSystem.get(htu.getConfiguration());
assertTrue(fs.delete(splitaRef, true));
// Create the refs from daughters of splita.
Path splitaaRef =
createReferences(services, htd, splita, splitaa, Bytes.toBytes("bbb"), false);
Path splitabRef =
createReferences(services, htd, splita, splitab, Bytes.toBytes("bbb"), true);
// Test splita. It should stick around because references from splitab, etc.
assertFalse(janitor.cleanParent(splita, regions.get(splita)));
// Now clean up parent daughter first. Remove references from its daughters.
assertTrue(fs.delete(splitaaRef, true));
assertTrue(fs.delete(splitabRef, true));
assertTrue(janitor.cleanParent(splita, regions.get(splita)));
// Super parent should get cleaned up now both splita and splitb are gone.
assertTrue(janitor.cleanParent(parent, regions.get(parent)));
}
private String setRootDirAndCleanIt(final HBaseTestingUtility htu,
final String subdir)
throws IOException {
Path testdir = HBaseTestingUtility.getTestDir(subdir);
FileSystem fs = FileSystem.get(htu.getConfiguration());
if (fs.exists(testdir)) assertTrue(fs.delete(testdir, true));
htu.getConfiguration().set(HConstants.HBASE_DIR, testdir.toString());
return htu.getConfiguration().get(HConstants.HBASE_DIR);
}
/**
* @param services Master services instance.
* @param htd
* @param parent
* @param daughter
* @param midkey
* @param top True if we are to write a 'top' reference.
* @return Path to reference we created.
* @throws IOException
*/
private Path createReferences(final MasterServices services,
final HTableDescriptor htd, final HRegionInfo parent,
final HRegionInfo daughter, final byte [] midkey, final boolean top)
throws IOException {
Path rootdir = services.getMasterFileSystem().getRootDir();
Path tabledir = HTableDescriptor.getTableDir(rootdir, parent.getTableName());
Path storedir = Store.getStoreHomedir(tabledir, daughter.getEncodedName(),
htd.getColumnFamilies()[0].getName());
Reference ref = new Reference(midkey,
top? Reference.Range.top: Reference.Range.bottom);
long now = System.currentTimeMillis();
// Reference name has this format: StoreFile#REF_NAME_PARSER
Path p = new Path(storedir, Long.toString(now) + "." + parent.getEncodedName());
FileSystem fs = services.getMasterFileSystem().getFileSystem();
ref.write(fs, p);
return p;
}
private Result createResult(final HRegionInfo parent, final HRegionInfo a,
final HRegionInfo b)
throws IOException {
List<KeyValue> kvs = new ArrayList<KeyValue>();
kvs.add(new KeyValue(parent.getRegionName(), HConstants.CATALOG_FAMILY,
HConstants.SPLITA_QUALIFIER, Writables.getBytes(a)));
kvs.add(new KeyValue(parent.getRegionName(), HConstants.CATALOG_FAMILY,
HConstants.SPLITB_QUALIFIER, Writables.getBytes(b)));
return new Result(kvs);
}
private HTableDescriptor createHTableDescriptor() {
HTableDescriptor htd = new HTableDescriptor("t");
htd.addFamily(new HColumnDescriptor("f"));
return htd;
}
}