From 3bda78ec61b25f49f676e1418472694aa060e1f8 Mon Sep 17 00:00:00 2001 From: Michael Stack Date: Tue, 13 Sep 2011 18:36:54 +0000 Subject: [PATCH] HBASE-4238 CatalogJanitor can clear a daughter that split before processing its parent git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1170291 13f79535-47bb-0310-9956-ffa450edef68 --- CHANGES.txt | 2 + .../hadoop/hbase/master/CatalogJanitor.java | 62 +++--- .../master/handler/ServerShutdownHandler.java | 5 + .../hbase/regionserver/SplitTransaction.java | 19 +- .../hbase/master/TestCatalogJanitor.java | 186 +++++++++++++++--- 5 files changed, 224 insertions(+), 50 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index 378896a8bb5..798f1a15c5f 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -570,6 +570,8 @@ Release 0.90.5 - Unreleased HBASE-4180 HBase should check the isSecurityEnabled flag before login HBASE-4325 Improve error message when using STARTROW for meta scans (Jonathan Hsieh) + HBASE-4238 CatalogJanitor can clear a daughter that split before + processing its parent IMPROVEMENT HBASE-4205 Enhance HTable javadoc (Eric Charles) diff --git a/src/main/java/org/apache/hadoop/hbase/master/CatalogJanitor.java b/src/main/java/org/apache/hadoop/hbase/master/CatalogJanitor.java index b53e9a03508..da9332cbd7d 100644 --- a/src/main/java/org/apache/hadoop/hbase/master/CatalogJanitor.java +++ b/src/main/java/org/apache/hadoop/hbase/master/CatalogJanitor.java @@ -34,9 +34,9 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.PathFilter; import org.apache.hadoop.hbase.Chore; import org.apache.hadoop.hbase.HColumnDescriptor; -import org.apache.hadoop.hbase.HTableDescriptor; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.HRegionInfo; +import org.apache.hadoop.hbase.HTableDescriptor; import org.apache.hadoop.hbase.Server; import org.apache.hadoop.hbase.TableExistsException; import org.apache.hadoop.hbase.catalog.MetaEditor; @@ -47,7 +47,6 @@ import org.apache.hadoop.hbase.regionserver.Store; import org.apache.hadoop.hbase.regionserver.StoreFile; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.Pair; -import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.Writables; @@ -107,26 +106,7 @@ class CatalogJanitor extends Chore { // Keep Map of found split parents. There are candidates for cleanup. // Use a comparator that has split parents come before its daughters. final Map splitParents = - new TreeMap(new Comparator () { - @Override - public int compare(HRegionInfo left, HRegionInfo right) { - // This comparator differs from the one HRegionInfo in that it sorts - // parent before daughters. - if (left == null) return -1; - if (right == null) return 1; - // Same table name. - int result = Bytes.compareTo(left.getTableName(), - right.getTableName()); - if (result != 0) return result; - // Compare start keys. - result = Bytes.compareTo(left.getStartKey(), right.getStartKey()); - if (result != 0) return result; - // Compare end keys. - result = Bytes.compareTo(left.getEndKey(), right.getEndKey()); - if (result != 0) return -result; // Flip the result so parent comes first. - return result; - } - }); + new TreeMap(new SplitParentFirstComparator()); // This visitor collects split parents and counts rows in the .META. table MetaReader.Visitor visitor = new MetaReader.Visitor() { @Override @@ -156,6 +136,31 @@ class CatalogJanitor extends Chore { } } + /** + * Compare HRegionInfos in a way that has split parents sort BEFORE their + * daughters. + */ + static class SplitParentFirstComparator implements Comparator { + @Override + public int compare(HRegionInfo left, HRegionInfo right) { + // This comparator differs from the one HRegionInfo in that it sorts + // parent before daughters. + if (left == null) return -1; + if (right == null) return 1; + // Same table name. + int result = Bytes.compareTo(left.getTableName(), + right.getTableName()); + if (result != 0) return result; + // Compare start keys. + result = Bytes.compareTo(left.getStartKey(), right.getStartKey()); + if (result != 0) return result; + // Compare end keys. + result = Bytes.compareTo(left.getEndKey(), right.getEndKey()); + if (result != 0) return -result; // Flip the result so parent comes first. + return result; + } + } + /** * Get HRegionInfo from passed Map of row values. * @param result Map to do lookup in. @@ -192,7 +197,7 @@ class CatalogJanitor extends Chore { checkDaughter(parent, rowContent, HConstants.SPLITA_QUALIFIER); Pair b = checkDaughter(parent, rowContent, HConstants.SPLITB_QUALIFIER); - if ((a.getFirst() && !a.getSecond()) && (b.getFirst() && !b.getSecond())) { + if (hasNoReferences(a) && hasNoReferences(b)) { LOG.debug("Deleting region " + parent.getRegionNameAsString() + " because daughter splits no longer hold references"); // This latter regionOffline should not be necessary but is done for now @@ -211,7 +216,16 @@ class CatalogJanitor extends Chore { return result; } - + /** + * @param p A pair where the first boolean says whether or not the daughter + * region directory exists in the filesystem and then the second boolean says + * whether the daughter has references to the parent. + * @return True the passed p signifies no references. + */ + private boolean hasNoReferences(final Pair p) { + return !p.getFirst() || !p.getSecond(); + } + /** * See if the passed daughter has references in the filesystem to the parent * and if not, remove the note of daughter region in the parent row: its diff --git a/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java b/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java index 742aea4af9f..6ac64088d7b 100644 --- a/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java +++ b/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java @@ -308,6 +308,11 @@ public class ServerShutdownHandler extends EventHandler { if (isDaughterMissing(catalogTracker, daughter)) { LOG.info("Fixup; missing daughter " + daughter.getRegionNameAsString()); MetaEditor.addDaughter(catalogTracker, daughter, null); + + // TODO: Log WARN if the regiondir does not exist in the fs. If its not + // there then something wonky about the split -- things will keep going + // but could be missing references to parent region. + // And assign it. assignmentManager.assign(daughter, true); } else { diff --git a/src/main/java/org/apache/hadoop/hbase/regionserver/SplitTransaction.java b/src/main/java/org/apache/hadoop/hbase/regionserver/SplitTransaction.java index abafe5ec4a8..250d28822cc 100644 --- a/src/main/java/org/apache/hadoop/hbase/regionserver/SplitTransaction.java +++ b/src/main/java/org/apache/hadoop/hbase/regionserver/SplitTransaction.java @@ -288,17 +288,28 @@ public class SplitTransaction { HRegion b = createDaughterRegion(this.hri_b, this.parent.rsServices); // Edit parent in meta. Offlines parent region and adds splita and splitb. + // TODO: This can 'fail' by timing out against .META. but the edits could + // be applied anyways over on the server. There is no way to tell for sure. + // We could try and get the edits again subsequent to their application + // whether we fail or not but that could fail too. We should probably move + // the PONR to here before the edits go in but could mean we'd abort the + // regionserver when we didn't need to; i.e. the edits did not make it in. if (!testing) { MetaEditor.offlineParentInMeta(server.getCatalogTracker(), this.parent.getRegionInfo(), a.getRegionInfo(), b.getRegionInfo()); } // This is the point of no return. Adding subsequent edits to .META. as we - // do below when we do the daugther opens adding each to .META. can fail in + // do below when we do the daughter opens adding each to .META. can fail in // various interesting ways the most interesting of which is a timeout - // BUT the edits all go through (See HBASE-3872). IF we reach the POWR + // BUT the edits all go through (See HBASE-3872). IF we reach the PONR // then subsequent failures need to crash out this regionserver; the // server shutdown processing should be able to fix-up the incomplete split. + // The offlined parent will have the daughters as extra columns. If + // we leave the daughter regions in place and do not remove them when we + // crash out, then they will have their references to the parent in place + // still and the server shutdown fixup of .META. will point to these + // regions. this.journal.add(JournalEntry.PONR); // Open daughters in parallel. DaughterOpener aOpener = new DaughterOpener(server, services, a); @@ -684,7 +695,9 @@ public class SplitTransaction { case PONR: // We got to the point-of-no-return so we need to just abort. Return - // immediately. + // immediately. Do not clean up created daughter regions. They need + // to be in place so we don't delete the parent region mistakenly. + // See HBASE-3872. return false; default: diff --git a/src/test/java/org/apache/hadoop/hbase/master/TestCatalogJanitor.java b/src/test/java/org/apache/hadoop/hbase/master/TestCatalogJanitor.java index 78e7d624fe2..1c6219b5d90 100644 --- a/src/test/java/org/apache/hadoop/hbase/master/TestCatalogJanitor.java +++ b/src/test/java/org/apache/hadoop/hbase/master/TestCatalogJanitor.java @@ -29,6 +29,8 @@ import java.io.IOException; import java.util.ArrayList; import java.util.List; import java.util.Map; +import java.util.SortedMap; +import java.util.TreeMap; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; @@ -57,7 +59,6 @@ import org.junit.Test; import org.mockito.Mockito; public class TestCatalogJanitor { - /** * Pseudo server for below tests. */ @@ -70,8 +71,7 @@ public class TestCatalogJanitor { this.c = htu.getConfiguration(); // Set hbase.rootdir into test dir. FileSystem fs = FileSystem.get(this.c); - Path rootdir = - fs.makeQualified(HBaseTestingUtility.getTestDir(HConstants.HBASE_DIR)); + Path rootdir = fs.makeQualified(new Path(this.c.get(HConstants.HBASE_DIR))); this.c.set(HConstants.HBASE_DIR, rootdir.toString()); this.ct = Mockito.mock(CatalogTracker.class); HRegionInterface hri = Mockito.mock(HRegionInterface.class); @@ -211,9 +211,7 @@ public class TestCatalogJanitor { @Override public HTableDescriptor get(String tablename) throws TableExistsException, FileNotFoundException, IOException { - HTableDescriptor htd = new HTableDescriptor("table"); - htd.addFamily(new HColumnDescriptor("family")); - return htd; + return createHTableDescriptor(); } @Override @@ -255,12 +253,12 @@ public class TestCatalogJanitor { @Test public void testCleanParent() throws IOException { HBaseTestingUtility htu = new HBaseTestingUtility(); + setRootDirAndCleanIt(htu, "testCleanParent"); Server server = new MockServer(htu); MasterServices services = new MockMasterServices(server); CatalogJanitor janitor = new CatalogJanitor(server, services); // Create regions. - HTableDescriptor htd = new HTableDescriptor("table"); - htd.addFamily(new HColumnDescriptor("family")); + HTableDescriptor htd = createHTableDescriptor(); HRegionInfo parent = new HRegionInfo(htd.getName(), Bytes.toBytes("aaa"), Bytes.toBytes("eee")); @@ -272,12 +270,7 @@ public class TestCatalogJanitor { Bytes.toBytes("eee")); // Test that when both daughter regions are in place, that we do not // remove the parent. - List kvs = new ArrayList(); - kvs.add(new KeyValue(parent.getRegionName(), HConstants.CATALOG_FAMILY, - HConstants.SPLITA_QUALIFIER, Writables.getBytes(splita))); - kvs.add(new KeyValue(parent.getRegionName(), HConstants.CATALOG_FAMILY, - HConstants.SPLITB_QUALIFIER, Writables.getBytes(splitb))); - Result r = new Result(kvs); + Result r = createResult(parent, splita, splitb); // Add a reference under splitA directory so we don't clear out the parent. Path rootdir = services.getMasterFileSystem().getRootDir(); Path tabledir = @@ -293,14 +286,161 @@ public class TestCatalogJanitor { assertFalse(janitor.cleanParent(parent, r)); // Remove the reference file and try again. assertTrue(fs.delete(p, true)); - // We will fail!!! Because split b is empty, which is right... we should - // not remove parent if daughters do not exist in fs. - assertFalse(janitor.cleanParent(parent, r)); - // Put in place daughter dir for b... that should make it so parent gets - // cleaned up. - storedir = Store.getStoreHomedir(tabledir, splitb.getEncodedName(), - htd.getColumnFamilies()[0].getName()); - assertTrue(fs.mkdirs(storedir)); assertTrue(janitor.cleanParent(parent, r)); } -} + + /** + * Make sure parent gets cleaned up even if daughter is cleaned up before it. + * @throws IOException + * @throws InterruptedException + */ + @Test + public void testParentCleanedEvenIfDaughterGoneFirst() + throws IOException, InterruptedException { + HBaseTestingUtility htu = new HBaseTestingUtility(); + setRootDirAndCleanIt(htu, "testParentCleanedEvenIfDaughterGoneFirst"); + Server server = new MockServer(htu); + MasterServices services = new MockMasterServices(server); + CatalogJanitor janitor = new CatalogJanitor(server, services); + final HTableDescriptor htd = createHTableDescriptor(); + + // Create regions: aaa->eee, aaa->ccc, aaa->bbb, bbb->ccc, etc. + + // Parent + HRegionInfo parent = new HRegionInfo(htd.getName(), Bytes.toBytes("aaa"), + Bytes.toBytes("eee")); + // Sleep a second else the encoded name on these regions comes out + // same for all with same start key and made in same second. + Thread.sleep(1001); + + // Daughter a + HRegionInfo splita = new HRegionInfo(htd.getName(), Bytes.toBytes("aaa"), + Bytes.toBytes("ccc")); + Thread.sleep(1001); + // Make daughters of daughter a; splitaa and splitab. + HRegionInfo splitaa = new HRegionInfo(htd.getName(), Bytes.toBytes("aaa"), + Bytes.toBytes("bbb")); + HRegionInfo splitab = new HRegionInfo(htd.getName(), Bytes.toBytes("bbb"), + Bytes.toBytes("ccc")); + + // Daughter b + HRegionInfo splitb = new HRegionInfo(htd.getName(), Bytes.toBytes("ccc"), + Bytes.toBytes("eee")); + Thread.sleep(1001); + // Make Daughters of daughterb; splitba and splitbb. + HRegionInfo splitba = new HRegionInfo(htd.getName(), Bytes.toBytes("ccc"), + Bytes.toBytes("ddd")); + HRegionInfo splitbb = new HRegionInfo(htd.getName(), Bytes.toBytes("ddd"), + Bytes.toBytes("eee")); + + // First test that our Comparator works right up in CatalogJanitor. + // Just fo kicks. + SortedMap regions = + new TreeMap(new CatalogJanitor.SplitParentFirstComparator()); + // Now make sure that this regions map sorts as we expect it to. + regions.put(parent, createResult(parent, splita, splitb)); + regions.put(splitb, createResult(splitb, splitba, splitbb)); + regions.put(splita, createResult(splita, splitaa, splitab)); + // Assert its properly sorted. + int index = 0; + for (Map.Entry e: regions.entrySet()) { + if (index == 0) { + assertTrue(e.getKey().getEncodedName().equals(parent.getEncodedName())); + } else if (index == 1) { + assertTrue(e.getKey().getEncodedName().equals(splita.getEncodedName())); + } else if (index == 2) { + assertTrue(e.getKey().getEncodedName().equals(splitb.getEncodedName())); + } + index++; + } + + // Now play around with the cleanParent function. Create a ref from splita + // up to the parent. + Path splitaRef = + createReferences(services, htd, parent, splita, Bytes.toBytes("ccc"), false); + // Make sure actual super parent sticks around because splita has a ref. + assertFalse(janitor.cleanParent(parent, regions.get(parent))); + + //splitba, and split bb, do not have dirs in fs. That means that if + // we test splitb, it should get cleaned up. + assertTrue(janitor.cleanParent(splitb, regions.get(splitb))); + + // Now remove ref from splita to parent... so parent can be let go and so + // the daughter splita can be split (can't split if still references). + // BUT make the timing such that the daughter gets cleaned up before we + // can get a chance to let go of the parent. + FileSystem fs = FileSystem.get(htu.getConfiguration()); + assertTrue(fs.delete(splitaRef, true)); + // Create the refs from daughters of splita. + Path splitaaRef = + createReferences(services, htd, splita, splitaa, Bytes.toBytes("bbb"), false); + Path splitabRef = + createReferences(services, htd, splita, splitab, Bytes.toBytes("bbb"), true); + + // Test splita. It should stick around because references from splitab, etc. + assertFalse(janitor.cleanParent(splita, regions.get(splita))); + + // Now clean up parent daughter first. Remove references from its daughters. + assertTrue(fs.delete(splitaaRef, true)); + assertTrue(fs.delete(splitabRef, true)); + assertTrue(janitor.cleanParent(splita, regions.get(splita))); + + // Super parent should get cleaned up now both splita and splitb are gone. + assertTrue(janitor.cleanParent(parent, regions.get(parent))); + } + + private String setRootDirAndCleanIt(final HBaseTestingUtility htu, + final String subdir) + throws IOException { + Path testdir = HBaseTestingUtility.getTestDir(subdir); + FileSystem fs = FileSystem.get(htu.getConfiguration()); + if (fs.exists(testdir)) assertTrue(fs.delete(testdir, true)); + htu.getConfiguration().set(HConstants.HBASE_DIR, testdir.toString()); + return htu.getConfiguration().get(HConstants.HBASE_DIR); + } + + /** + * @param services Master services instance. + * @param htd + * @param parent + * @param daughter + * @param midkey + * @param top True if we are to write a 'top' reference. + * @return Path to reference we created. + * @throws IOException + */ + private Path createReferences(final MasterServices services, + final HTableDescriptor htd, final HRegionInfo parent, + final HRegionInfo daughter, final byte [] midkey, final boolean top) + throws IOException { + Path rootdir = services.getMasterFileSystem().getRootDir(); + Path tabledir = HTableDescriptor.getTableDir(rootdir, parent.getTableName()); + Path storedir = Store.getStoreHomedir(tabledir, daughter.getEncodedName(), + htd.getColumnFamilies()[0].getName()); + Reference ref = new Reference(midkey, + top? Reference.Range.top: Reference.Range.bottom); + long now = System.currentTimeMillis(); + // Reference name has this format: StoreFile#REF_NAME_PARSER + Path p = new Path(storedir, Long.toString(now) + "." + parent.getEncodedName()); + FileSystem fs = services.getMasterFileSystem().getFileSystem(); + ref.write(fs, p); + return p; + } + + private Result createResult(final HRegionInfo parent, final HRegionInfo a, + final HRegionInfo b) + throws IOException { + List kvs = new ArrayList(); + kvs.add(new KeyValue(parent.getRegionName(), HConstants.CATALOG_FAMILY, + HConstants.SPLITA_QUALIFIER, Writables.getBytes(a))); + kvs.add(new KeyValue(parent.getRegionName(), HConstants.CATALOG_FAMILY, + HConstants.SPLITB_QUALIFIER, Writables.getBytes(b))); + return new Result(kvs); + } + + private HTableDescriptor createHTableDescriptor() { + HTableDescriptor htd = new HTableDescriptor("t"); + htd.addFamily(new HColumnDescriptor("f")); + return htd; + } +} \ No newline at end of file