diff --git a/CHANGES.txt b/CHANGES.txt index 8b618250cba..cc1741acc59 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -272,6 +272,9 @@ Release 0.91.0 - Unreleased shouldn't join HBASE-4395 EnableTableHandler races with itself HBASE-4414 Region splits by size not being triggered + HBASE-4322 HBASE-4322 [hbck] Update checkIntegrity/checkRegionChain + to present more accurate region split problem + (Jon Hseih) IMPROVEMENTS HBASE-3290 Max Compaction Size (Nicolas Spiegelberg via Stack) diff --git a/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java b/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java index a152bcbf3fc..f2ee12c00b7 100644 --- a/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java +++ b/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java @@ -20,11 +20,20 @@ package org.apache.hadoop.hbase.util; import java.io.IOException; -import java.util.*; -import java.util.concurrent.atomic.AtomicInteger; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.Comparator; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import java.util.SortedSet; +import java.util.TreeMap; +import java.util.TreeSet; +import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.ThreadPoolExecutor; import java.util.concurrent.TimeUnit; -import java.util.concurrent.LinkedBlockingQueue; +import java.util.concurrent.atomic.AtomicInteger; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -55,14 +64,14 @@ import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.ipc.HRegionInterface; import org.apache.hadoop.hbase.regionserver.wal.HLog; import org.apache.hadoop.hbase.zookeeper.RootRegionTracker; +import org.apache.hadoop.hbase.util.HBaseFsck.ErrorReporter.ERROR_CODE; import org.apache.hadoop.hbase.zookeeper.ZKTable; import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher; import org.apache.zookeeper.KeeperException; import com.google.common.base.Joiner; import com.google.common.collect.Lists; - -import static org.apache.hadoop.hbase.util.HBaseFsck.ErrorReporter.ERROR_CODE; +import com.google.common.collect.Multimap; /** * Check consistency among the in-memory states of the master and the @@ -544,7 +553,6 @@ public class HBaseFsck { modTInfo.addServer(server); } - //modTInfo.addEdge(hbi.metaEntry.getStartKey(), hbi.metaEntry.getEndKey()); modTInfo.addRegionInfo(hbi); tablesInfo.put(tableName, modTInfo); @@ -563,15 +571,36 @@ public class HBaseFsck { private class TInfo { String tableName; TreeSet deployedOn; - List regions = new ArrayList(); + + List backwards = new ArrayList(); + RegionSplitCalculator sc = new RegionSplitCalculator(cmp); TInfo(String name) { this.tableName = name; deployedOn = new TreeSet (); } - public void addRegionInfo (HbckInfo r) { - regions.add(r); + public void addRegionInfo(HbckInfo hir) { + if (Bytes.equals(hir.getEndKey(), HConstants.EMPTY_END_ROW)) { + // end key is absolute end key, just add it. + sc.add(hir); + return; + } + + // if not the absolute end key, check for cycle + if (Bytes.compareTo(hir.getStartKey(), hir.getEndKey()) > 0) { + errors.reportError( + ERROR_CODE.REGION_CYCLE, + String.format("The endkey for this region comes before the " + + "startkey, startkey=%s, endkey=%s", + Bytes.toStringBinary(hir.getStartKey()), + Bytes.toStringBinary(hir.getEndKey())), this, hir); + backwards.add(hir); + return; + } + + // main case, add to split calculator + sc.add(hir); } public void addServer(ServerName server) { @@ -583,7 +612,7 @@ public class HBaseFsck { } public int getNumRegions() { - return regions.size(); + return sc.getStarts().size(); } /** @@ -592,66 +621,59 @@ public class HBaseFsck { * @return false if there are errors */ public boolean checkRegionChain() { - Collections.sort(regions); - HbckInfo last = null; + int originalErrorsCount = errors.getErrorList().size(); + Multimap regions = sc.calcCoverage(); + SortedSet splits = sc.getSplits(); - for (HbckInfo r : regions) { - if (last == null) { - // This is the first region, check that the start key is empty - if (! Bytes.equals(r.metaEntry.getStartKey(), HConstants.EMPTY_BYTE_ARRAY)) { + byte[] prevKey = null; + for (byte[] key: splits) { + Collection ranges = regions.get(key); + if (prevKey == null && !Bytes.equals(key, HConstants.EMPTY_BYTE_ARRAY)) { + for (HbckInfo rng : ranges) { errors.reportError(ERROR_CODE.FIRST_REGION_STARTKEY_NOT_EMPTY, "First region should start with an empty key.", - this, r); + this, rng); } - } else { - - // Check if endKey < startKey - // Previous implementation of this code checked for a cycle in the - // region chain. A cycle would imply that the endKey comes before - // the startKey (i.e. endKey < startKey). - if (! Bytes.equals(r.metaEntry.getEndKey(), HConstants.EMPTY_BYTE_ARRAY)) { - // continue with this check if this is not the last region - int cmpRegionKeys = Bytes.compareTo(r.metaEntry.getStartKey(), - r.metaEntry.getEndKey()); - if (cmpRegionKeys > 0) { - errors.reportError(ERROR_CODE.REGION_CYCLE, - String.format("The endkey for this region comes before the " - + "startkey, startkey=%s, endkey=%s", - Bytes.toStringBinary(r.metaEntry.getStartKey()), - Bytes.toStringBinary(r.metaEntry.getEndKey())), - this, r, last); - } - } - - // Check if the startkeys are different - if (Bytes.equals(r.metaEntry.getStartKey(), last.metaEntry.getStartKey())) { - errors.reportError(ERROR_CODE.DUPE_STARTKEYS, - "Two regions have the same startkey: " - + Bytes.toStringBinary(r.metaEntry.getStartKey()), - this, r, last); - } else { - // Check that the startkey is the same as the previous end key - int cmp = Bytes.compareTo(r.metaEntry.getStartKey(), - last.metaEntry.getEndKey()); - if (cmp > 0) { - // hole - errors.reportError(ERROR_CODE.HOLE_IN_REGION_CHAIN, - "There is a hole in the region chain.", - this, r, last); - } else if (cmp < 0) { - // overlap - errors.reportError(ERROR_CODE.OVERLAP_IN_REGION_CHAIN, - "There is an overlap in the region chain.", - this, r, last); - } - } - } - - last = r; + + // Check if the startkeys are different + if (ranges.size() > 1) { + ArrayList subRange = new ArrayList(ranges); + // this dumb and n^2 but this shouldn't happen often + for (HbckInfo r1 : ranges) { + subRange.remove(r1); + for (HbckInfo r2 : subRange) { + if (Bytes.compareTo(r1.getStartKey(), r2.getStartKey())==0) { + // dup start key + errors.reportError(ERROR_CODE.DUPE_STARTKEYS, + "Multiple regions have the same startkey: " + + Bytes.toStringBinary(key), this, r1); + errors.reportError(ERROR_CODE.DUPE_STARTKEYS, + "Multiple regions have the same startkey: " + + Bytes.toStringBinary(key), this, r2); + } else { + // overlap + errors.reportError(ERROR_CODE.OVERLAP_IN_REGION_CHAIN, + "There is an overlap in the region chain.", + this, r1); + } + } + } + } + + if (ranges.size() == 0) { + byte[] holeStopKey = sc.getSplits().higher(key); + // if higher key is null we reached the top. + if (holeStopKey != null) { + // hole + errors.reportError(ERROR_CODE.HOLE_IN_REGION_CHAIN, + "There is a hole in the region chain between " + + Bytes.toString(key) + " and " + Bytes.toString(holeStopKey)); + } + } + prevKey = key; } - return errors.getErrorList().size() == originalErrorsCount; } @@ -833,7 +855,7 @@ public class HBaseFsck { /** * Stores the entries scanned from META */ - private static class MetaEntry extends HRegionInfo { + static class MetaEntry extends HRegionInfo { ServerName regionServer; // server hosting this region long modTime; // timestamp of most recent modification metadata @@ -847,7 +869,7 @@ public class HBaseFsck { /** * Maintain information about a particular region. */ - static class HbckInfo implements Comparable { + static class HbckInfo implements KeyRange { boolean onlyEdits = false; MetaEntry metaEntry = null; FileStatus foundRegionDir = null; @@ -872,16 +894,53 @@ public class HBaseFsck { } @Override - public int compareTo(Object o) { - HbckInfo other = (HbckInfo) o; - int startComparison = Bytes.compareTo(this.metaEntry.getStartKey(), other.metaEntry.getStartKey()); - if (startComparison != 0) - return startComparison; - else - return Bytes.compareTo(this.metaEntry.getEndKey(), other.metaEntry.getEndKey()); + public byte[] getStartKey() { + return this.metaEntry.getStartKey(); + } + + @Override + public byte[] getEndKey() { + return this.metaEntry.getEndKey(); } } + final static Comparator cmp = new Comparator() { + @Override + public int compare(HbckInfo l, HbckInfo r) { + if (l == r) { + // same instance + return 0; + } + + int tableCompare = RegionSplitCalculator.BYTES_COMPARATOR.compare( + l.metaEntry.getTableName(), r.metaEntry.getTableName()); + if (tableCompare != 0) { + return tableCompare; + } + + int startComparison = RegionSplitCalculator.BYTES_COMPARATOR.compare( + l.metaEntry.getStartKey(), r.metaEntry.getStartKey()); + if (startComparison != 0) { + return startComparison; + } + + // Special case for absolute endkey + byte[] endKey = r.metaEntry.getEndKey(); + endKey = (endKey.length == 0) ? null : endKey; + byte[] endKey2 = l.metaEntry.getEndKey(); + endKey2 = (endKey2.length == 0) ? null : endKey2; + int endComparison = RegionSplitCalculator.BYTES_COMPARATOR.compare( + endKey2, endKey); + + if (endComparison != 0) { + return endComparison; + } + + // use modTime as tiebreaker. + return (int) (l.metaEntry.modTime - r.metaEntry.modTime); + } + }; + /** * Prints summary of all tables found on the system. */ @@ -1102,7 +1161,7 @@ public class HBaseFsck { } /** - * Contact hdfs and get all information about spcified table directory. + * Contact hdfs and get all information about specified table directory. */ static class WorkItemHdfsDir implements Runnable { private HBaseFsck hbck; @@ -1324,4 +1383,3 @@ public class HBaseFsck { Runtime.getRuntime().exit(code); } } - diff --git a/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java b/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java index f42a87c191c..fae0881cf78 100644 --- a/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java +++ b/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java @@ -24,16 +24,23 @@ import static org.junit.Assert.assertEquals; import java.io.IOException; import java.util.Arrays; import java.util.List; +import java.util.Map; +import java.util.Map.Entry; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.HBaseTestingUtility; import org.apache.hadoop.hbase.HColumnDescriptor; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.HRegionInfo; +import org.apache.hadoop.hbase.HServerAddress; import org.apache.hadoop.hbase.HTableDescriptor; + import org.apache.hadoop.hbase.ServerName; +import org.apache.hadoop.hbase.client.Delete; import org.apache.hadoop.hbase.client.HBaseAdmin; import org.apache.hadoop.hbase.client.HTable; import org.apache.hadoop.hbase.client.Put; @@ -146,6 +153,49 @@ public class TestHBaseFsck { return hri; } + public void dumpMeta(HTableDescriptor htd) throws IOException { + List metaRows = TEST_UTIL.getMetaTableRows(htd.getName()); + for (byte[] row : metaRows) { + LOG.info(Bytes.toString(row)); + } + } + + private void deleteRegion(Configuration conf, final HTableDescriptor htd, + byte[] startKey, byte[] endKey) throws IOException { + + LOG.info("Before delete:"); + dumpMeta(htd); + + Map hris = tbl.getRegionsInfo(); + for (Entry e: hris.entrySet()) { + HRegionInfo hri = e.getKey(); + HServerAddress hsa = e.getValue(); + if (Bytes.compareTo(hri.getStartKey(), startKey) == 0 + && Bytes.compareTo(hri.getEndKey(), endKey) == 0) { + + LOG.info("RegionName: " +hri.getRegionNameAsString()); + byte[] deleteRow = hri.getRegionName(); + TEST_UTIL.getHBaseAdmin().unassign(deleteRow, true); + + LOG.info("deleting hdfs data: " + hri.toString() + hsa.toString()); + Path rootDir = new Path(conf.get(HConstants.HBASE_DIR)); + FileSystem fs = rootDir.getFileSystem(conf); + Path p = new Path(rootDir + "/" + htd.getNameAsString(), hri.getEncodedName()); + fs.delete(p, true); + + HTable meta = new HTable(conf, HConstants.META_TABLE_NAME); + Delete delete = new Delete(deleteRow); + meta.delete(delete); + } + LOG.info(hri.toString() + hsa.toString()); + } + + TEST_UTIL.getMetaTableRows(htd.getName()); + LOG.info("After delete:"); + dumpMeta(htd); + + } + /** * Setup a clean table before we start mucking with it. * @@ -213,7 +263,8 @@ public class TestHBaseFsck { .waitForAssignment(hriDupe); assertErrors(doFsck(false), - new ERROR_CODE[] { ERROR_CODE.DUPE_STARTKEYS }); + new ERROR_CODE[] { ERROR_CODE.DUPE_STARTKEYS, + ERROR_CODE.DUPE_STARTKEYS}); } finally { deleteTable(table); } @@ -253,10 +304,14 @@ public class TestHBaseFsck { // Mess it up by leaving a hole in the meta data HRegionInfo hriHole = createRegion(conf, tbl.getTableDescriptor(), - Bytes.toBytes("D"), Bytes.toBytes("E")); + Bytes.toBytes("D"), Bytes.toBytes("")); TEST_UTIL.getHBaseCluster().getMaster().assignRegion(hriHole); TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager() .waitForAssignment(hriHole); + + TEST_UTIL.getHBaseAdmin().disableTable(table); + deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("C"), Bytes.toBytes("")); + TEST_UTIL.getHBaseAdmin().enableTable(table); assertErrors(doFsck(false), new ERROR_CODE[] { ERROR_CODE.HOLE_IN_REGION_CHAIN }); } finally {