HBASE-2819 hbck should have the ability to repair basic problems

git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1031694 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael Stack 2010-11-05 18:20:43 +00:00
parent 92e0f47b8b
commit b09838d4f4
13 changed files with 963 additions and 336 deletions

View File

@ -1102,7 +1102,7 @@ Release 0.21.0 - Unreleased
a minute
HBASE-3189 Stagger Major Compactions (Nicolas Spiegelberg via Stack)
HBASE-2564 [rest] Tests use deprecated foundation
HBASE-2819 hbck should have the ability to repair basic problems
NEW FEATURES
HBASE-1961 HBase EC2 scripts

View File

@ -325,9 +325,10 @@ public class MetaReader {
*/
public static Pair<HRegionInfo, HServerInfo> metaRowToRegionPairWithInfo(
Result data) throws IOException {
HRegionInfo info = Writables.getHRegionInfo(
data.getValue(HConstants.CATALOG_FAMILY,
HConstants.REGIONINFO_QUALIFIER));
byte [] bytes = data.getValue(HConstants.CATALOG_FAMILY,
HConstants.REGIONINFO_QUALIFIER);
if (bytes == null) return null;
HRegionInfo info = Writables.getHRegionInfo(bytes);
final byte[] value = data.getValue(HConstants.CATALOG_FAMILY,
HConstants.SERVER_QUALIFIER);
if (value != null && value.length > 0) {
@ -529,4 +530,4 @@ public class MetaReader {
*/
public boolean visit(final Result r) throws IOException;
}
}
}

View File

@ -737,11 +737,21 @@ public class HBaseAdmin implements Abortable {
HServerAddress hsa = new HServerAddress(hostAndPort);
Pair<HRegionInfo, HServerAddress> pair =
MetaReader.getRegion(ct, regionname);
closeRegion(hsa, pair.getFirst());
if (pair == null || pair.getSecond() == null) {
LOG.info("No server in .META. for " +
Bytes.toString(regionname) + "; pair=" + pair);
} else {
closeRegion(hsa, pair.getFirst());
}
} else {
Pair<HRegionInfo, HServerAddress> pair =
MetaReader.getRegion(ct, regionname);
closeRegion(pair.getSecond(), pair.getFirst());
if (pair == null || pair.getSecond() == null) {
LOG.info("No server in .META. for " +
Bytes.toString(regionname) + "; pair=" + pair);
} else {
closeRegion(pair.getSecond(), pair.getFirst());
}
}
} finally {
cleanupCatalogTracker(ct);
@ -783,12 +793,18 @@ public class HBaseAdmin implements Abortable {
if (isRegionName) {
Pair<HRegionInfo, HServerAddress> pair =
MetaReader.getRegion(getCatalogTracker(), tableNameOrRegionName);
flush(pair.getSecond(), pair.getFirst());
if (pair == null || pair.getSecond() == null) {
LOG.info("No server in .META. for " +
Bytes.toString(tableNameOrRegionName) + "; pair=" + pair);
} else {
flush(pair.getSecond(), pair.getFirst());
}
} else {
List<Pair<HRegionInfo, HServerAddress>> pairs =
MetaReader.getTableRegionsAndLocations(getCatalogTracker(),
Bytes.toString(tableNameOrRegionName));
for (Pair<HRegionInfo, HServerAddress> pair: pairs) {
if (pair.getSecond() == null) continue;
flush(pair.getSecond(), pair.getFirst());
}
}
@ -871,12 +887,18 @@ public class HBaseAdmin implements Abortable {
if (isRegionName(tableNameOrRegionName)) {
Pair<HRegionInfo, HServerAddress> pair =
MetaReader.getRegion(ct, tableNameOrRegionName);
compact(pair.getSecond(), pair.getFirst(), major);
if (pair == null || pair.getSecond() == null) {
LOG.info("No server in .META. for " +
Bytes.toString(tableNameOrRegionName) + "; pair=" + pair);
} else {
compact(pair.getSecond(), pair.getFirst(), major);
}
} else {
List<Pair<HRegionInfo, HServerAddress>> pairs =
MetaReader.getTableRegionsAndLocations(ct,
Bytes.toString(tableNameOrRegionName));
for (Pair<HRegionInfo, HServerAddress> pair: pairs) {
if (pair.getSecond() == null) continue;
compact(pair.getSecond(), pair.getFirst(), major);
}
}
@ -956,12 +978,19 @@ public class HBaseAdmin implements Abortable {
// Its a possible region name.
Pair<HRegionInfo, HServerAddress> pair =
MetaReader.getRegion(getCatalogTracker(), tableNameOrRegionName);
split(pair.getSecond(), pair.getFirst());
if (pair == null || pair.getSecond() == null) {
LOG.info("No server in .META. for " +
Bytes.toString(tableNameOrRegionName) + "; pair=" + pair);
} else {
split(pair.getSecond(), pair.getFirst());
}
} else {
List<Pair<HRegionInfo, HServerAddress>> pairs =
MetaReader.getTableRegionsAndLocations(getCatalogTracker(),
Bytes.toString(tableNameOrRegionName));
for (Pair<HRegionInfo, HServerAddress> pair: pairs) {
// May not be a server for a particular row
if (pair.getSecond() == null) continue;
split(pair.getSecond(), pair.getFirst());
}
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,121 @@
/**
* Copyright 2010 The Apache Software Foundation
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.client;
import java.io.IOException;
import java.util.List;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.HServerAddress;
import org.apache.hadoop.hbase.ipc.HMasterInterface;
import org.apache.hadoop.hbase.ipc.HRegionInterface;
import org.apache.hadoop.hbase.zookeeper.ZKAssign;
import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
import org.apache.zookeeper.KeeperException;
public class HBaseFsckRepair {
public static void fixDupeAssignment(Configuration conf, HRegionInfo region,
List<HServerAddress> servers)
throws IOException {
HRegionInfo actualRegion = new HRegionInfo(region);
// Clear status in master and zk
clearInMaster(conf, actualRegion);
clearInZK(conf, actualRegion);
// Close region on the servers
for(HServerAddress server : servers) {
closeRegion(conf, server, actualRegion);
}
// It's unassigned so fix it as such
fixUnassigned(conf, actualRegion);
}
public static void fixUnassigned(Configuration conf, HRegionInfo region)
throws IOException {
HRegionInfo actualRegion = new HRegionInfo(region);
// Clear status in master and zk
clearInMaster(conf, actualRegion);
clearInZK(conf, actualRegion);
// Clear assignment in META or ROOT
clearAssignment(conf, actualRegion);
}
private static void clearInMaster(Configuration conf, HRegionInfo region)
throws IOException {
System.out.println("Region being cleared in master: " + region);
HMasterInterface master = HConnectionManager.getConnection(conf).getMaster();
long masterVersion =
master.getProtocolVersion("org.apache.hadoop.hbase.ipc.HMasterInterface", 25);
System.out.println("Master protocol version: " + masterVersion);
try {
// TODO: Do we want to do it this way?
// Better way is to tell master to fix the issue itself?
// That way it can use in-memory state to determine best plan
// master.clearFromTransition(region);
} catch (Exception e) {}
}
private static void clearInZK(Configuration conf, HRegionInfo region)
throws IOException {
ZooKeeperWatcher zkw =
HConnectionManager.getConnection(conf).getZooKeeperWatcher();
try {
ZKAssign.deleteNodeFailSilent(zkw, region);
} catch (KeeperException e) {
throw new IOException("Unexpected ZK exception", e);
}
}
private static void closeRegion(Configuration conf, HServerAddress server,
HRegionInfo region)
throws IOException {
HRegionInterface rs =
HConnectionManager.getConnection(conf).getHRegionConnection(server);
rs.closeRegion(region, false);
}
private static void clearAssignment(Configuration conf,
HRegionInfo region)
throws IOException {
HTable ht = null;
if (region.isMetaTable()) {
// Clear assignment in ROOT
ht = new HTable(conf, HConstants.ROOT_TABLE_NAME);
}
else {
// Clear assignment in META
ht = new HTable(conf, HConstants.META_TABLE_NAME);
}
Delete del = new Delete(region.getRegionName());
del.deleteColumns(HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER);
del.deleteColumns(HConstants.CATALOG_FAMILY,
HConstants.STARTCODE_QUALIFIER);
ht.delete(del);
}
}

View File

@ -63,13 +63,14 @@ public class MetaScanner {
*
* @param configuration config
* @param visitor visitor object
* @param tableName table name
* @param userTableName User table name in meta table to start scan at. Pass
* null if not interested in a particular table.
* @throws IOException e
*/
public static void metaScan(Configuration configuration,
MetaScannerVisitor visitor, byte[] tableName)
MetaScannerVisitor visitor, byte [] userTableName)
throws IOException {
metaScan(configuration, visitor, tableName, null, Integer.MAX_VALUE);
metaScan(configuration, visitor, userTableName, null, Integer.MAX_VALUE);
}
/**
@ -79,7 +80,8 @@ public class MetaScanner {
*
* @param configuration HBase configuration.
* @param visitor Visitor object.
* @param tableName User table name.
* @param userTableName User table name in meta table to start scan at. Pass
* null if not interested in a particular table.
* @param row Name of the row at the user table. The scan will start from
* the region row where the row resides.
* @param rowLimit Max of processed rows. If it is less than 0, it
@ -87,8 +89,32 @@ public class MetaScanner {
* @throws IOException e
*/
public static void metaScan(Configuration configuration,
MetaScannerVisitor visitor, byte[] tableName, byte[] row,
MetaScannerVisitor visitor, byte [] userTableName, byte[] row,
int rowLimit)
throws IOException {
metaScan(configuration, visitor, userTableName, row, rowLimit,
HConstants.META_TABLE_NAME);
}
/**
* Scans the meta table and calls a visitor on each RowResult. Uses a table
* name and a row name to locate meta regions. And it only scans at most
* <code>rowLimit</code> of rows.
*
* @param configuration HBase configuration.
* @param visitor Visitor object.
* @param userTableName User table name in meta table to start scan at. Pass
* null if not interested in a particular table.
* @param row Name of the row at the user table. The scan will start from
* the region row where the row resides.
* @param rowLimit Max of processed rows. If it is less than 0, it
* will be set to default value <code>Integer.MAX_VALUE</code>.
* @param metaTableName Meta table to scan, root or meta.
* @throws IOException e
*/
public static void metaScan(Configuration configuration,
MetaScannerVisitor visitor, byte [] tableName, byte[] row,
int rowLimit, final byte [] metaTableName)
throws IOException {
int rowUpperLimit = rowLimit > 0 ? rowLimit: Integer.MAX_VALUE;
@ -136,8 +162,6 @@ public class MetaScanner {
configuration.getInt("hbase.meta.scanner.caching", 100));
do {
final Scan scan = new Scan(startRow).addFamily(HConstants.CATALOG_FAMILY);
byte [] metaTableName = Bytes.equals(tableName, HConstants.ROOT_TABLE_NAME)?
HConstants.ROOT_TABLE_NAME: HConstants.META_TABLE_NAME;
LOG.debug("Scanning " + Bytes.toString(metaTableName) +
" starting at row=" + Bytes.toString(startRow) + " for max=" +
rowUpperLimit + " rows");

View File

@ -80,5 +80,5 @@ public interface HBaseRPCProtocolVersion extends VersionedProtocol {
* <li>Version 26: New master and Increment, 0.90 version bump.</li>
* </ul>
*/
public static final long versionID = 25L; // Setting it to 25 temporarily to see if hudson passes. #1608 hudson failed because of version mismatch 25 vs 26.
public static final long versionID = 26L;
}

View File

@ -275,7 +275,7 @@ public interface HRegionInterface extends HBaseRPCProtocolVersion, Stoppable, Ab
* @return All regions online on this region server
* @throws IOException e
*/
public NavigableSet<HRegionInfo> getOnlineRegions();
public List<HRegionInfo> getOnlineRegions();
/**
* Method used when a master is taking the place of another failed one.
@ -334,6 +334,17 @@ public interface HRegionInterface extends HBaseRPCProtocolVersion, Stoppable, Ab
public boolean closeRegion(final HRegionInfo region)
throws IOException;
/**
* Closes the specified region and will use or not use ZK during the close
* according to the specified flag.
* @param region region to close
* @param zk true if transitions should be done in ZK, false if not
* @return true if closing region, false if not
* @throws IOException
*/
public boolean closeRegion(final HRegionInfo region, final boolean zk)
throws IOException;
// Region administrative methods
/**

View File

@ -1199,6 +1199,7 @@ public class AssignmentManager extends ZooKeeperListener {
for (Result result : results) {
Pair<HRegionInfo,HServerInfo> region =
MetaReader.metaRowToRegionPairWithInfo(result);
if (region == null) continue;
HServerInfo regionLocation = region.getSecond();
HRegionInfo regionInfo = region.getFirst();
if (regionLocation == null) {
@ -1325,6 +1326,34 @@ public class AssignmentManager extends ZooKeeperListener {
}
}
/**
* Clears the specified region from being in transition.
* <p>
* Used only by HBCK tool.
* @param hri
*/
public void clearRegionFromTransition(HRegionInfo hri) {
synchronized (this.regionsInTransition) {
this.regionsInTransition.remove(hri.getEncodedName());
}
synchronized (this.regions) {
this.regions.remove(hri);
}
synchronized (this.regionPlans) {
this.regionPlans.remove(hri.getEncodedName());
}
synchronized (this.servers) {
for (List<HRegionInfo> regions : this.servers.values()) {
for (int i=0;i<regions.size();i++) {
if (regions.get(i).equals(hri)) {
regions.remove(i);
break;
}
}
}
}
}
/**
* Checks if the specified table has been disabled by the user.
* @param tableName

View File

@ -862,6 +862,11 @@ implements HMasterInterface, HMasterRegionInterface, MasterServices, Server {
}
}
public void clearFromTransition(HRegionInfo hri) {
if (this.assignmentManager.isRegionInTransition(hri) != null) {
this.assignmentManager.clearRegionFromTransition(hri);
}
}
/**
* @return cluster status
*/

View File

@ -345,64 +345,67 @@ public class HRegionServer implements HRegionInterface, HBaseRPCErrorHandler,
@Override
public Integer apply(Writable from) {
if (from instanceof HBaseRPC.Invocation) {
HBaseRPC.Invocation inv = (HBaseRPC.Invocation) from;
if (!(from instanceof HBaseRPC.Invocation)) return NORMAL_QOS;
String methodName = inv.getMethodName();
HBaseRPC.Invocation inv = (HBaseRPC.Invocation) from;
String methodName = inv.getMethodName();
// scanner methods...
if (methodName.equals("next") || methodName.equals("close")) {
// translate!
Long scannerId;
try {
scannerId = (Long) inv.getParameters()[0];
} catch (ClassCastException ignored) {
//LOG.debug("Low priority: " + from);
return NORMAL_QOS; // doh.
}
String scannerIdString = Long.toString(scannerId);
InternalScanner scanner = scanners.get(scannerIdString);
if (scanner instanceof HRegion.RegionScanner) {
HRegion.RegionScanner rs = (HRegion.RegionScanner) scanner;
HRegionInfo regionName = rs.getRegionName();
if (regionName.isMetaRegion()) {
//LOG.debug("High priority scanner request: " + scannerId);
return HIGH_QOS;
}
}
// scanner methods...
if (methodName.equals("next") || methodName.equals("close")) {
// translate!
Long scannerId;
try {
scannerId = (Long) inv.getParameters()[0];
} catch (ClassCastException ignored) {
// LOG.debug("Low priority: " + from);
return NORMAL_QOS; // doh.
}
else if (methodName.equals("getHServerInfo") ||
methodName.equals("getRegionsAssignment") ||
methodName.equals("unlockRow") ||
methodName.equals("getProtocolVersion") ||
methodName.equals("getClosestRowBefore")) {
//LOG.debug("High priority method: " + methodName);
return HIGH_QOS;
}
else if (inv.getParameterClasses()[0] == byte[].class) {
// first arg is byte array, so assume this is a regionname:
if (isMetaRegion((byte[]) inv.getParameters()[0])) {
//LOG.debug("High priority with method: " + methodName + " and region: "
// + Bytes.toString((byte[]) inv.getParameters()[0]));
String scannerIdString = Long.toString(scannerId);
InternalScanner scanner = scanners.get(scannerIdString);
if (scanner instanceof HRegion.RegionScanner) {
HRegion.RegionScanner rs = (HRegion.RegionScanner) scanner;
HRegionInfo regionName = rs.getRegionName();
if (regionName.isMetaRegion()) {
// LOG.debug("High priority scanner request: " + scannerId);
return HIGH_QOS;
}
}
else if (inv.getParameterClasses()[0] == MultiAction.class) {
MultiAction ma = (MultiAction) inv.getParameters()[0];
Set<byte[]> regions = ma.getRegions();
// ok this sucks, but if any single of the actions touches a meta, the whole
// thing gets pingged high priority. This is a dangerous hack because people
// can get their multi action tagged high QOS by tossing a Get(.META.) AND this
// regionserver hosts META/-ROOT-
for (byte[] region: regions) {
if (isMetaRegion(region)) {
//LOG.debug("High priority multi with region: " + Bytes.toString(region));
return HIGH_QOS; // short circuit for the win.
}
} else if (methodName.equals("getHServerInfo")
|| methodName.equals("getRegionsAssignment")
|| methodName.equals("unlockRow")
|| methodName.equals("getProtocolVersion")
|| methodName.equals("getClosestRowBefore")) {
// LOG.debug("High priority method: " + methodName);
return HIGH_QOS;
} else if (inv.getParameterClasses().length == 0) {
// Just let it through. This is getOnlineRegions, etc.
} else if (inv.getParameterClasses()[0] == byte[].class) {
// first arg is byte array, so assume this is a regionname:
if (isMetaRegion((byte[]) inv.getParameters()[0])) {
// LOG.debug("High priority with method: " + methodName +
// " and region: "
// + Bytes.toString((byte[]) inv.getParameters()[0]));
return HIGH_QOS;
}
} else if (inv.getParameterClasses()[0] == MultiAction.class) {
MultiAction ma = (MultiAction) inv.getParameters()[0];
Set<byte[]> regions = ma.getRegions();
// ok this sucks, but if any single of the actions touches a meta, the
// whole
// thing gets pingged high priority. This is a dangerous hack because
// people
// can get their multi action tagged high QOS by tossing a Get(.META.)
// AND this
// regionserver hosts META/-ROOT-
for (byte[] region : regions) {
if (isMetaRegion(region)) {
// LOG.debug("High priority multi with region: " +
// Bytes.toString(region));
return HIGH_QOS; // short circuit for the win.
}
}
}
//LOG.debug("Low priority: " + from.toString());
// LOG.debug("Low priority: " + from.toString());
return NORMAL_QOS;
}
}
@ -1973,17 +1976,21 @@ public class HRegionServer implements HRegionInterface, HBaseRPCErrorHandler,
@Override
public boolean closeRegion(HRegionInfo region)
throws NotServingRegionException {
return closeRegion(region, true);
}
@Override
public boolean closeRegion(HRegionInfo region, final boolean zk)
throws NotServingRegionException {
LOG.info("Received close region: " + region.getRegionNameAsString());
// TODO: Need to check if this is being served here but currently undergoing
// a split (so master needs to retry close after split is complete)
if (!onlineRegions.containsKey(region.getEncodedName())) {
LOG.warn("Received close for region we are not serving; " +
region.getEncodedName());
throw new NotServingRegionException("Received close for "
+ region.getRegionNameAsString() + " but we are not serving it");
}
return closeRegion(region, false, true);
return closeRegion(region, false, zk);
}
/**
@ -2066,14 +2073,14 @@ public class HRegionServer implements HRegionInterface, HBaseRPCErrorHandler,
}
@Override
public NavigableSet<HRegionInfo> getOnlineRegions() {
NavigableSet<HRegionInfo> sortedset = new TreeSet<HRegionInfo>();
public List<HRegionInfo> getOnlineRegions() {
List<HRegionInfo> list = new ArrayList<HRegionInfo>();
synchronized(this.onlineRegions) {
for (Map.Entry<String,HRegion> e: this.onlineRegions.entrySet()) {
sortedset.add(e.getValue().getRegionInfo());
list.add(e.getValue().getRegionInfo());
}
}
return sortedset;
return list;
}
public int getNumberOfOnlineRegions() {

View File

@ -85,6 +85,10 @@ public class MiniHBaseCluster {
init(numMasters, numRegionServers);
}
public Configuration getConfiguration() {
return this.conf;
}
/**
* Override Master so can add inject behaviors testing.
*/

View File

@ -20,6 +20,7 @@
package org.apache.hadoop.hbase.client;
import static org.junit.Assert.*;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
@ -45,8 +46,10 @@ import org.apache.hadoop.hbase.TableNotFoundException;
import org.apache.hadoop.hbase.executor.EventHandler;
import org.apache.hadoop.hbase.executor.EventHandler.EventType;
import org.apache.hadoop.hbase.executor.ExecutorService;
import org.apache.hadoop.hbase.ipc.HRegionInterface;
import org.apache.hadoop.hbase.master.MasterServices;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.JVMClusterUtil;
import org.junit.AfterClass;
import org.junit.Before;
import org.junit.BeforeClass;
@ -81,6 +84,15 @@ public class TestAdmin {
this.admin = new HBaseAdmin(TEST_UTIL.getConfiguration());
}
@Test
public void testHBaseFsck() throws IOException {
HBaseFsck fsck =
new HBaseFsck(TEST_UTIL.getMiniHBaseCluster().getConfiguration());
fsck.displayFullReport();
int result = fsck.doWork();
assertEquals(0, result);
}
@Test
public void testCreateTable() throws IOException {
HTableDescriptor [] tables = admin.listTables();