HBASE-5719 Enhance hbck to sideline overlapped mega regions (Jimmy Xiang)

git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1325404 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Jonathan Hsieh 2012-04-12 17:44:52 +00:00
parent 38eb4579e4
commit 36324e341d
3 changed files with 243 additions and 19 deletions

View File

@ -22,8 +22,10 @@ import java.util.ArrayList;
import java.util.Collection; import java.util.Collection;
import java.util.Collections; import java.util.Collections;
import java.util.Comparator; import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet; import java.util.HashSet;
import java.util.List; import java.util.List;
import java.util.Map;
import java.util.Map.Entry; import java.util.Map.Entry;
import java.util.Set; import java.util.Set;
import java.util.SortedSet; import java.util.SortedSet;
@ -77,7 +79,6 @@ import org.apache.hadoop.hbase.util.hbck.TableIntegrityErrorHandler;
import org.apache.hadoop.hbase.util.hbck.TableIntegrityErrorHandlerImpl; import org.apache.hadoop.hbase.util.hbck.TableIntegrityErrorHandlerImpl;
import org.apache.hadoop.hbase.zookeeper.RootRegionTracker; import org.apache.hadoop.hbase.zookeeper.RootRegionTracker;
import org.apache.hadoop.hbase.zookeeper.ZKTable; import org.apache.hadoop.hbase.zookeeper.ZKTable;
import org.apache.hadoop.hbase.zookeeper.ZKUtil;
import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher; import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
import org.apache.zookeeper.KeeperException; import org.apache.zookeeper.KeeperException;
@ -140,6 +141,7 @@ public class HBaseFsck {
private static final int MAX_NUM_THREADS = 50; // #threads to contact regions private static final int MAX_NUM_THREADS = 50; // #threads to contact regions
private static final long THREADS_KEEP_ALIVE_SECONDS = 60; private static final long THREADS_KEEP_ALIVE_SECONDS = 60;
private static boolean rsSupportsOffline = true; private static boolean rsSupportsOffline = true;
private static final int DEFAULT_OVERLAPS_TO_SIDELINE = 2;
private static final int DEFAULT_MAX_MERGE = 5; private static final int DEFAULT_MAX_MERGE = 5;
/********************** /**********************
@ -152,7 +154,6 @@ public class HBaseFsck {
private HBaseAdmin admin; private HBaseAdmin admin;
private HTable meta; private HTable meta;
private ThreadPoolExecutor executor; // threads to retrieve data from regionservers private ThreadPoolExecutor executor; // threads to retrieve data from regionservers
private int numThreads = MAX_NUM_THREADS;
private long startMillis = System.currentTimeMillis(); private long startMillis = System.currentTimeMillis();
/*********** /***********
@ -170,6 +171,8 @@ public class HBaseFsck {
// limit fixes to listed tables, if empty atttempt to fix all // limit fixes to listed tables, if empty atttempt to fix all
private List<byte[]> tablesToFix = new ArrayList<byte[]>(); private List<byte[]> tablesToFix = new ArrayList<byte[]>();
private int maxMerge = DEFAULT_MAX_MERGE; // maximum number of overlapping regions to merge private int maxMerge = DEFAULT_MAX_MERGE; // maximum number of overlapping regions to merge
private int maxOverlapsToSideline = DEFAULT_OVERLAPS_TO_SIDELINE; // maximum number of overlapping regions to sideline
private boolean sidelineBigOverlaps = false; // sideline overlaps with >maxMerge regions
private boolean rerun = false; // if we tried to fix something, rerun hbck private boolean rerun = false; // if we tried to fix something, rerun hbck
private static boolean summary = false; // if we want to print less output private static boolean summary = false; // if we want to print less output
@ -215,7 +218,7 @@ public class HBaseFsck {
ZooKeeperConnectionException, IOException { ZooKeeperConnectionException, IOException {
this.conf = conf; this.conf = conf;
int numThreads = conf.getInt("hbasefsck.numthreads", Integer.MAX_VALUE); int numThreads = conf.getInt("hbasefsck.numthreads", MAX_NUM_THREADS);
executor = new ThreadPoolExecutor(1, numThreads, executor = new ThreadPoolExecutor(1, numThreads,
THREADS_KEEP_ALIVE_SECONDS, TimeUnit.SECONDS, THREADS_KEEP_ALIVE_SECONDS, TimeUnit.SECONDS,
new SynchronousQueue<Runnable>()); new SynchronousQueue<Runnable>());
@ -834,14 +837,14 @@ public class HBaseFsck {
/** /**
* Sideline a region dir (instead of deleting it) * Sideline a region dir (instead of deleting it)
*/ */
void sidelineRegionDir(FileSystem fs, HbckInfo hi) Path sidelineRegionDir(FileSystem fs, HbckInfo hi)
throws IOException { throws IOException {
String tableName = Bytes.toString(hi.getTableName()); String tableName = Bytes.toString(hi.getTableName());
Path regionDir = hi.getHdfsRegionDir(); Path regionDir = hi.getHdfsRegionDir();
if (!fs.exists(regionDir)) { if (!fs.exists(regionDir)) {
LOG.warn("No previous " + regionDir + " exists. Continuing."); LOG.warn("No previous " + regionDir + " exists. Continuing.");
return; return null;
} }
Path sidelineTableDir= new Path(getSidelineDir(), tableName); Path sidelineTableDir= new Path(getSidelineDir(), tableName);
@ -874,12 +877,15 @@ public class HBaseFsck {
// dst (foo/a) exists and is a dir, and the src (foo/b) is a dir, // dst (foo/a) exists and is a dir, and the src (foo/b) is a dir,
// it moves the src into the dst dir resulting in (foo/a/b). If // it moves the src into the dst dir resulting in (foo/a/b). If
// the dst does not exist, and the src a dir, src becomes dst. (foo/b) // the dst does not exist, and the src a dir, src becomes dst. (foo/b)
for (FileStatus hfile : fs.listStatus(src)) { FileStatus[] hfiles = fs.listStatus(src);
success = fs.rename(hfile.getPath(), dst); if (hfiles != null && hfiles.length > 0) {
if (!success) { for (FileStatus hfile : hfiles) {
String msg = "Unable to rename file " + src + " to " + dst; success = fs.rename(hfile.getPath(), dst);
LOG.error(msg); if (!success) {
throw new IOException(msg); String msg = "Unable to rename file " + src + " to " + dst;
LOG.error(msg);
throw new IOException(msg);
}
} }
} }
LOG.debug("Sideline directory contents:"); LOG.debug("Sideline directory contents:");
@ -894,6 +900,7 @@ public class HBaseFsck {
LOG.error(msg); LOG.error(msg);
throw new IOException(msg); throw new IOException(msg);
} }
return sidelineRegionDir;
} }
/** /**
@ -1552,6 +1559,9 @@ public class HBaseFsck {
// backwards regions // backwards regions
final List<HbckInfo> backwards = new ArrayList<HbckInfo>(); final List<HbckInfo> backwards = new ArrayList<HbckInfo>();
// sidelined big overlapped regions
final Map<Path, HbckInfo> sidelinedRegions = new HashMap<Path, HbckInfo>();
// region split calculator // region split calculator
final RegionSplitCalculator<HbckInfo> sc = new RegionSplitCalculator<HbckInfo>(cmp); final RegionSplitCalculator<HbckInfo> sc = new RegionSplitCalculator<HbckInfo>(cmp);
@ -1573,6 +1583,9 @@ public class HBaseFsck {
private HTableDescriptor getHTD() { private HTableDescriptor getHTD() {
if (htds.size() == 1) { if (htds.size() == 1) {
return (HTableDescriptor)htds.toArray()[0]; return (HTableDescriptor)htds.toArray()[0];
} else {
LOG.error("None/Multiple table descriptors found for table '"
+ tableName + "' regions: " + htds);
} }
return null; return null;
} }
@ -1753,13 +1766,21 @@ public class HBaseFsck {
if (overlap.size() > maxMerge) { if (overlap.size() > maxMerge) {
LOG.warn("Overlap group has " + overlap.size() + " overlapping " + LOG.warn("Overlap group has " + overlap.size() + " overlapping " +
"regions which is greater than " + maxMerge + ", the max " + "regions which is greater than " + maxMerge + ", the max number of regions to merge");
"number of regions to merge."); if (sidelineBigOverlaps) {
// we only sideline big overlapped groups that exceeds the max number of regions to merge
sidelineBigOverlaps(overlap);
}
return; return;
} }
mergeOverlaps(overlap);
}
void mergeOverlaps(Collection<HbckInfo> overlap)
throws IOException {
LOG.info("== Merging regions into one region: " LOG.info("== Merging regions into one region: "
+ Joiner.on(",").join(overlap)); + Joiner.on(",").join(overlap));
// get the min / max range and close all concerned regions // get the min / max range and close all concerned regions
Pair<byte[], byte[]> range = null; Pair<byte[], byte[]> range = null;
for (HbckInfo hi : overlap) { for (HbckInfo hi : overlap) {
@ -1823,7 +1844,48 @@ public class HBaseFsck {
fixes++; fixes++;
} }
} }
};
/**
* Sideline some regions in a big overlap group so that it
* will have fewer regions, and it is easier to merge them later on.
*
* @param bigOverlap the overlapped group with regions more than maxMerge
* @throws IOException
*/
void sidelineBigOverlaps(
Collection<HbckInfo> bigOverlap) throws IOException {
int overlapsToSideline = bigOverlap.size() - maxMerge;
if (overlapsToSideline > maxOverlapsToSideline) {
overlapsToSideline = maxOverlapsToSideline;
}
List<HbckInfo> regionsToSideline =
RegionSplitCalculator.findBigRanges(bigOverlap, overlapsToSideline);
FileSystem fs = FileSystem.get(conf);
for (HbckInfo regionToSideline: regionsToSideline) {
try {
LOG.info("Closing region: " + regionToSideline);
closeRegion(regionToSideline);
} catch (InterruptedException ie) {
LOG.warn("Was unable to close region " + regionToSideline.getRegionNameAsString()
+ ". Interrupted.");
throw new IOException(ie);
}
LOG.info("Offlining region: " + regionToSideline);
offline(regionToSideline.getRegionName());
LOG.info("Before sideline big overlapped region: " + regionToSideline.toString());
Path sidelineRegionDir = sidelineRegionDir(fs, regionToSideline);
if (sidelineRegionDir != null) {
sidelinedRegions.put(sidelineRegionDir, regionToSideline);
LOG.info("After sidelined big overlapped region: "
+ regionToSideline.getRegionNameAsString()
+ " to " + sidelineRegionDir.toString());
fixes++;
}
}
}
}
/** /**
* Check the region chain (from META) of this table. We are looking for * Check the region chain (from META) of this table. We are looking for
@ -1909,16 +1971,22 @@ public class HBaseFsck {
if (details) { if (details) {
// do full region split map dump // do full region split map dump
System.out.println("---- Table '" + this.tableName System.out.println("---- Table '" + this.tableName
+ "': region split map"); + "': region split map");
dump(splits, regions); dump(splits, regions);
System.out.println("---- Table '" + this.tableName System.out.println("---- Table '" + this.tableName
+ "': overlap groups"); + "': overlap groups");
dumpOverlapProblems(overlapGroups); dumpOverlapProblems(overlapGroups);
System.out.println("There are " + overlapGroups.keySet().size() System.out.println("There are " + overlapGroups.keySet().size()
+ " overlap groups with " + overlapGroups.size() + " overlap groups with " + overlapGroups.size()
+ " overlapping regions"); + " overlapping regions");
} }
if (!sidelinedRegions.isEmpty()) {
LOG.warn("Sidelined big overlapped regions, please bulk load them!");
System.out.println("---- Table '" + this.tableName
+ "': sidelined big overlapped regions");
dumpSidelinedRegions(sidelinedRegions);
}
return errors.getErrorList().size() == originalErrorsCount; return errors.getErrorList().size() == originalErrorsCount;
} }
@ -1954,6 +2022,13 @@ public class HBaseFsck {
} }
} }
public void dumpSidelinedRegions(Map<Path, HbckInfo> regions) {
for (Path k : regions.keySet()) {
System.out.println("To be bulk loaded sidelined region dir: "
+ k.toString());
}
}
public Multimap<byte[], HbckInfo> getOverlapGroups( public Multimap<byte[], HbckInfo> getOverlapGroups(
String table) { String table) {
TableInfo ti = tablesInfo.get(table); TableInfo ti = tablesInfo.get(table);
@ -2760,6 +2835,14 @@ public class HBaseFsck {
public boolean shouldFixVersionFile() { public boolean shouldFixVersionFile() {
return fixVersionFile; return fixVersionFile;
} }
public void setSidelineBigOverlaps(boolean sbo) {
this.sidelineBigOverlaps = sbo;
}
public boolean shouldSidelineBigOverlaps() {
return sidelineBigOverlaps;
}
/** /**
* @param mm maximum number of regions to merge into a single region. * @param mm maximum number of regions to merge into a single region.
@ -2772,6 +2855,14 @@ public class HBaseFsck {
return maxMerge; return maxMerge;
} }
public void setMaxOverlapsToSideline(int mo) {
this.maxOverlapsToSideline = mo;
}
public int getMaxOverlapsToSideline() {
return maxOverlapsToSideline;
}
/** /**
* Only fix tables specified by the list * Only fix tables specified by the list
*/ */
@ -2821,9 +2912,11 @@ public class HBaseFsck {
System.err.println(" -fixHdfsOverlaps Try to fix region overlaps in hdfs."); System.err.println(" -fixHdfsOverlaps Try to fix region overlaps in hdfs.");
System.err.println(" -fixVersionFile Try to fix missing hbase.version file in hdfs."); System.err.println(" -fixVersionFile Try to fix missing hbase.version file in hdfs.");
System.err.println(" -maxMerge <n> When fixing region overlaps, allow at most <n> regions to merge. (n=" + DEFAULT_MAX_MERGE +" by default)"); System.err.println(" -maxMerge <n> When fixing region overlaps, allow at most <n> regions to merge. (n=" + DEFAULT_MAX_MERGE +" by default)");
System.err.println(" -sidelineBigOverlaps When fixing region overlaps, allow to sideline big overlaps");
System.err.println(" -maxOverlapsToSideline <n> When fixing region overlaps, allow at most <n> regions to sideline per group. (n=" + DEFAULT_OVERLAPS_TO_SIDELINE +" by default)");
System.err.println(""); System.err.println("");
System.err.println(" -repair Shortcut for -fixAssignments -fixMeta -fixHdfsHoles " + System.err.println(" -repair Shortcut for -fixAssignments -fixMeta -fixHdfsHoles " +
"-fixHdfsOrphans -fixHdfsOverlaps -fixVersionFile"); "-fixHdfsOrphans -fixHdfsOverlaps -fixVersionFile -sidelineBigOverlaps");
System.err.println(" -repairHoles Shortcut for -fixAssignments -fixMeta -fixHdfsHoles -fixHdfsOrphans"); System.err.println(" -repairHoles Shortcut for -fixAssignments -fixMeta -fixHdfsHoles -fixHdfsOrphans");
Runtime.getRuntime().exit(-2); Runtime.getRuntime().exit(-2);
@ -2888,6 +2981,8 @@ public class HBaseFsck {
fsck.setFixHdfsOverlaps(true); fsck.setFixHdfsOverlaps(true);
} else if (cmd.equals("-fixVersionFile")) { } else if (cmd.equals("-fixVersionFile")) {
fsck.setFixVersionFile(true); fsck.setFixVersionFile(true);
} else if (cmd.equals("-sidelineBigOverlaps")) {
fsck.setSidelineBigOverlaps(true);
} else if (cmd.equals("-repair")) { } else if (cmd.equals("-repair")) {
// this attempts to merge overlapping hdfs regions, needs testing // this attempts to merge overlapping hdfs regions, needs testing
// under load // under load
@ -2897,6 +2992,7 @@ public class HBaseFsck {
fsck.setFixAssignments(true); fsck.setFixAssignments(true);
fsck.setFixHdfsOverlaps(true); fsck.setFixHdfsOverlaps(true);
fsck.setFixVersionFile(true); fsck.setFixVersionFile(true);
fsck.setSidelineBigOverlaps(true);
} else if (cmd.equals("-repairHoles")) { } else if (cmd.equals("-repairHoles")) {
// this will make all missing hdfs regions available but may lose data // this will make all missing hdfs regions available but may lose data
fsck.setFixHdfsHoles(true); fsck.setFixHdfsHoles(true);
@ -2904,6 +3000,20 @@ public class HBaseFsck {
fsck.setFixMeta(true); fsck.setFixMeta(true);
fsck.setFixAssignments(true); fsck.setFixAssignments(true);
fsck.setFixHdfsOverlaps(false); fsck.setFixHdfsOverlaps(false);
fsck.setSidelineBigOverlaps(false);
} else if (cmd.equals("-maxOverlapsToSideline")) {
if (i == args.length - 1) {
System.err.println("-maxOverlapsToSideline needs a numeric value argument.");
printUsageAndExit();
}
try {
int maxOverlapsToSideline = Integer.parseInt(args[i+1]);
fsck.setMaxOverlapsToSideline(maxOverlapsToSideline);
} catch (NumberFormatException e) {
System.err.println("-maxOverlapsToSideline needs a numeric value argument.");
printUsageAndExit();
}
i++;
} else if (cmd.equals("-maxMerge")) { } else if (cmd.equals("-maxMerge")) {
if (i == args.length - 1) { if (i == args.length - 1) {
System.err.println("-maxMerge needs a numeric value argument."); System.err.println("-maxMerge needs a numeric value argument.");

View File

@ -19,9 +19,12 @@
*/ */
package org.apache.hadoop.hbase.util; package org.apache.hadoop.hbase.util;
import java.util.ArrayList;
import java.util.Collection; import java.util.Collection;
import java.util.Comparator; import java.util.Comparator;
import java.util.List;
import java.util.Map.Entry; import java.util.Map.Entry;
import java.util.TreeMap;
import java.util.TreeSet; import java.util.TreeSet;
import org.apache.commons.logging.Log; import org.apache.commons.logging.Log;
@ -96,7 +99,7 @@ public class RegionSplitCalculator<R extends KeyRange> {
* *
* @return ENDKEY if end key is empty, else normal endkey. * @return ENDKEY if end key is empty, else normal endkey.
*/ */
private byte[] specialEndKey(R range) { private static <R extends KeyRange> byte[] specialEndKey(R range) {
byte[] end = range.getEndKey(); byte[] end = range.getEndKey();
if (end.length == 0) { if (end.length == 0) {
return ENDKEY; return ENDKEY;
@ -161,4 +164,75 @@ public class RegionSplitCalculator<R extends KeyRange> {
return starts; return starts;
} }
/**
* Find specified number of top ranges in a big overlap group.
* It could return less if there are not that many top ranges.
* Once these top ranges are excluded, the big overlap group will
* be broken into ranges with no overlapping, or smaller overlapped
* groups, and most likely some holes.
*
* @param bigOverlap a list of ranges that overlap with each other
* @param count the max number of ranges to find
* @return a list of ranges that overlap with most others
*/
public static <R extends KeyRange> List<R>
findBigRanges(Collection<R> bigOverlap, int count) {
List<R> bigRanges = new ArrayList<R>();
// The key is the count of overlaps,
// The value is a list of ranges that have that many overlaps
TreeMap<Integer, List<R>> overlapRangeMap = new TreeMap<Integer, List<R>>();
for (R r: bigOverlap) {
// Calculates the # of overlaps for each region
// and populates rangeOverlapMap
byte[] startKey = r.getStartKey();
byte[] endKey = specialEndKey(r);
int overlappedRegions = 0;
for (R rr: bigOverlap) {
byte[] start = rr.getStartKey();
byte[] end = specialEndKey(rr);
if (BYTES_COMPARATOR.compare(startKey, end) < 0
&& BYTES_COMPARATOR.compare(endKey, start) > 0) {
overlappedRegions++;
}
}
// One region always overlaps with itself,
// so overlappedRegions should be more than 1
// for actual overlaps.
if (overlappedRegions > 1) {
Integer key = Integer.valueOf(overlappedRegions);
List<R> ranges = overlapRangeMap.get(key);
if (ranges == null) {
ranges = new ArrayList<R>();
overlapRangeMap.put(key, ranges);
}
ranges.add(r);
}
}
int toBeAdded = count;
for (Integer key: overlapRangeMap.descendingKeySet()) {
List<R> chunk = overlapRangeMap.get(key);
int chunkSize = chunk.size();
if (chunkSize <= toBeAdded) {
bigRanges.addAll(chunk);
toBeAdded -= chunkSize;
if (toBeAdded > 0) continue;
} else {
// Try to use the middle chunk in case the overlapping is
// chained, for example: [a, c), [b, e), [d, g), [f h)...
// In such a case, sideline the middle chunk will break
// the group efficiently.
int start = (chunkSize - toBeAdded)/2;
int end = start + toBeAdded;
for (int i = start; i < end; i++) {
bigRanges.add(chunk.get(i));
}
}
break;
}
return bigRanges;
}
} }

View File

@ -20,9 +20,12 @@
package org.apache.hadoop.hbase.util; package org.apache.hadoop.hbase.util;
import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import java.util.ArrayList;
import java.util.Collection; import java.util.Collection;
import java.util.Comparator; import java.util.Comparator;
import java.util.List;
import java.util.SortedSet; import java.util.SortedSet;
import org.apache.commons.logging.Log; import org.apache.commons.logging.Log;
@ -32,6 +35,7 @@ import org.junit.Test;
import com.google.common.collect.ComparisonChain; import com.google.common.collect.ComparisonChain;
import com.google.common.collect.Multimap; import com.google.common.collect.Multimap;
import org.junit.experimental.categories.Category; import org.junit.experimental.categories.Category;
@Category(SmallTests.class) @Category(SmallTests.class)
@ -351,6 +355,42 @@ public class TestRegionSplitCalculator {
+ "null:\t\n"); + "null:\t\n");
} }
@Test
public void testBigRanges() {
SimpleRange ai = new SimpleRange(Bytes.toBytes("A"), Bytes.toBytes("I"));
SimpleRange ae = new SimpleRange(Bytes.toBytes("A"), Bytes.toBytes("E"));
SimpleRange ac = new SimpleRange(Bytes.toBytes("A"), Bytes.toBytes("C"));
Collection<SimpleRange> bigOverlap = new ArrayList<SimpleRange>();
bigOverlap.add(new SimpleRange(Bytes.toBytes("A"), Bytes.toBytes("E")));
bigOverlap.add(new SimpleRange(Bytes.toBytes("A"), Bytes.toBytes("C")));
bigOverlap.add(new SimpleRange(Bytes.toBytes("A"), Bytes.toBytes("B")));
bigOverlap.add(new SimpleRange(Bytes.toBytes("B"), Bytes.toBytes("C")));
bigOverlap.add(new SimpleRange(Bytes.toBytes("E"), Bytes.toBytes("H")));
bigOverlap.add(ai);
bigOverlap.add(ae);
bigOverlap.add(ac);
// Expect 1 range to be returned: ai
List<SimpleRange> bigRanges = RegionSplitCalculator.findBigRanges(bigOverlap, 1);
assertEquals(1, bigRanges.size());
assertEquals(ai, bigRanges.get(0));
// Expect 3 ranges to be returned: ai, ae and ac
bigRanges = RegionSplitCalculator.findBigRanges(bigOverlap, 3);
assertEquals(3, bigRanges.size());
assertEquals(ai, bigRanges.get(0));
SimpleRange r1 = bigRanges.get(1);
SimpleRange r2 = bigRanges.get(2);
assertEquals(Bytes.toString(r1.start), "A");
assertEquals(Bytes.toString(r2.start), "A");
String r1e = Bytes.toString(r1.end);
String r2e = Bytes.toString(r2.end);
assertTrue((r1e.equals("C") && r2e.equals("E"))
|| (r1e.equals("E") && r2e.equals("C")));
}
@org.junit.Rule @org.junit.Rule
public org.apache.hadoop.hbase.ResourceCheckerJUnitRule cu = public org.apache.hadoop.hbase.ResourceCheckerJUnitRule cu =
new org.apache.hadoop.hbase.ResourceCheckerJUnitRule(); new org.apache.hadoop.hbase.ResourceCheckerJUnitRule();