HBASE-927 We don't recover if HRS hosting -ROOT-/.META. goes down

git-svn-id: https://svn.apache.org/repos/asf/hadoop/hbase/trunk@722704 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Jim Kellerman 2008-12-03 00:47:59 +00:00
parent d479c750de
commit 78165487d9
13 changed files with 195 additions and 149 deletions

View File

@ -89,6 +89,7 @@ Release 0.19.0 - Unreleased
HBASE-1037 Some test cases failing on Windows/Cygwin but not UNIX/Linux
HBASE-1041 Migration throwing NPE
HBASE-1042 OOME but we don't abort
HBASE-927 We don't recover if HRS hosting -ROOT-/.META. goes down
IMPROVEMENTS
HBASE-901 Add a limit to key length, check key and value length on client side

View File

@ -23,12 +23,7 @@ import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Iterator;
import org.apache.hadoop.hbase.regionserver.HRegion;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.Strings;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;
@ -36,7 +31,7 @@ import org.apache.hadoop.io.WritableComparable;
/**
* This class encapsulates metrics for determining the load on a HRegionServer
*/
@SuppressWarnings("unused")
@SuppressWarnings("unchecked")
public class HServerLoad implements WritableComparable {
/** number of regions */
// could just use regionLoad.size() but master.RegionManager likes to play
@ -138,7 +133,7 @@ public class HServerLoad implements WritableComparable {
}
/**
* @param storefiles the number of stores
* @param stores the number of stores
*/
public void setStores(int stores) {
this.stores = stores;
@ -303,10 +298,6 @@ public class HServerLoad implements WritableComparable {
return numberOfRegions;
}
public Collection<RegionLoad> getRegionLoad() {
return Collections.unmodifiableCollection(regionLoad);
}
/**
* @return the numberOfRequests
*/
@ -383,13 +374,14 @@ public class HServerLoad implements WritableComparable {
}
/**
-* @param name
-* @param stores
-* @param storefiles
-* @param memcacheSizeMB
-* @param storefileIndexSizeMB
* @param name
* @param stores
* @param storefiles
* @param memcacheSizeMB
* @param storefileIndexSizeMB
* @deprecated Use {@link #addRegionInfo(RegionLoad)}
*/
@Deprecated
public void addRegionInfo(final byte[] name, final int stores,
final int storefiles, final int memcacheSizeMB,
final int storefileIndexSizeMB) {

View File

@ -22,9 +22,7 @@ package org.apache.hadoop.hbase.master;
import java.io.IOException;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.fs.FileSystem; //TODO: remove
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.regionserver.HStoreFile; //TODO: remove
import org.apache.hadoop.hbase.util.FSUtils;
import org.apache.hadoop.hbase.ipc.HRegionInterface;

View File

@ -125,7 +125,7 @@ public class HMaster extends Thread implements HConstants, HMasterInterface,
final int metaRescanInterval;
// A Sleeper that sleeps for threadWakeFrequency
protected final Sleeper sleeper;
private final Sleeper sleeper;
// Default access so accesible from unit tests. MASTER is name of the webapp
// and the attribute name used stuffing this instance into web context.
@ -590,27 +590,14 @@ public class HMaster extends Thread implements HConstants, HMasterInterface,
if (!isMasterRunning()) {
throw new MasterNotRunningException();
}
HRegionInfo newRegion = new HRegionInfo(desc, null, null);
for (int tries = 0; tries < numRetries; tries++) {
try {
// We can not access meta regions if they have not already been
// assigned and scanned. If we timeout waiting, just shutdown.
if (regionManager.waitForMetaRegionsOrClose()) {
break;
}
createTable(newRegion);
LOG.info("created table " + desc.getNameAsString());
break;
} catch (TableExistsException e) {
throw e;
} catch (IOException e) {
if (tries == numRetries - 1) {
throw RemoteExceptionHandler.checkIOException(e);
}
sleeper.sleep();
}
// We can not create a table unless meta regions have already been
// assigned and scanned.
if (!regionManager.areAllMetaRegionsOnline()) {
throw new NotAllMetaRegionsOnlineException();
}
HRegionInfo newRegion = new HRegionInfo(desc, null, null);
createTable(newRegion);
LOG.info("created table " + desc.getNameAsString());
}
private synchronized void createTable(final HRegionInfo newRegion)

View File

@ -61,7 +61,7 @@ class MetaScanner extends BaseScanner {
private boolean scanOneMetaRegion(MetaRegion region) {
while (!master.closed.get() && !regionManager.isInitialRootScanComplete() &&
regionManager.getRootRegionLocation() == null) {
master.sleeper.sleep();
sleep();
}
if (master.closed.get()) {
return false;

View File

@ -19,7 +19,6 @@
*/
package org.apache.hadoop.hbase.master;
import java.util.Map; //TODO: remove
import java.io.IOException;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.ipc.HRegionInterface;

View File

@ -0,0 +1,44 @@
/**
* Copyright 2008 The Apache Software Foundation
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.master;
import org.apache.hadoop.hbase.DoNotRetryIOException;
/**
* Thrown when an operation requires the root and all meta regions to be online
*/
@SuppressWarnings("serial")
public class NotAllMetaRegionsOnlineException extends DoNotRetryIOException {
/**
* default constructor
*/
public NotAllMetaRegionsOnlineException() {
super();
}
/**
* @param message
*/
public NotAllMetaRegionsOnlineException(String message) {
super(message);
}
}

View File

@ -47,6 +47,7 @@ class ProcessServerShutdown extends RegionServerOperation {
private final HServerAddress deadServer;
private final String deadServerName;
private final boolean rootRegionServer;
private boolean rootRegionReassigned = false;
private Path oldLogDir;
private boolean logSplit;
private boolean rootRescanned;
@ -250,14 +251,19 @@ class ProcessServerShutdown extends RegionServerOperation {
}
logSplit = true;
}
if (this.rootRegionServer && !this.rootRegionReassigned) {
// The server that died was serving the root region. Now that the log
// has been split, get it reassigned.
master.regionManager.reassignRootRegion();
// avoid multiple root region reassignment
this.rootRegionReassigned = true;
// When we call rootAvailable below, it will put us on the delayed
// to do queue to allow some time to pass during which the root
// region will hopefully get reassigned.
}
if (!rootAvailable()) {
if (rootRegionServer) {
// Get root region assigned now that log has been split and if the
// dead server was serving the root region
master.regionManager.reassignRootRegion();
}
// Return true so that worker does not put this request back on the
// toDoQueue.
// rootAvailable() has already put it on the delayedToDoQueue

View File

@ -59,7 +59,7 @@ import org.apache.hadoop.hbase.util.Writables;
class RegionManager implements HConstants {
protected static final Log LOG = LogFactory.getLog(RegionManager.class);
private volatile AtomicReference<HServerAddress> rootRegionLocation =
private AtomicReference<HServerAddress> rootRegionLocation =
new AtomicReference<HServerAddress>(null);
private volatile boolean safeMode = true;
@ -165,7 +165,7 @@ class RegionManager implements HConstants {
}
void reassignRootRegion() {
rootRegionLocation.set(null);
unsetRootRegion();
if (!master.shutdownRequested) {
unassignedRegions.put(HRegionInfo.ROOT_REGIONINFO, ZERO_L);
}
@ -500,8 +500,13 @@ class RegionManager implements HConstants {
* Block until meta regions are online or we're shutting down.
* @return true if we found meta regions, false if we're closing.
*/
public boolean waitForMetaRegionsOrClose() {
return metaScannerThread.waitForMetaRegionsOrClose();
public boolean areAllMetaRegionsOnline() {
boolean result = false;
if (rootRegionLocation.get() != null &&
numberOfMetaRegions.get() == onlineMetaRegions.size()) {
result = true;
}
return result;
}
/**
@ -530,16 +535,25 @@ class RegionManager implements HConstants {
* Get a set of all the meta regions that contain info about a given table.
* @param tableName Table you need to know all the meta regions for
* @return set of MetaRegion objects that contain the table
* @throws NotAllMetaRegionsOnlineException
*/
public Set<MetaRegion> getMetaRegionsForTable(byte [] tableName) {
public Set<MetaRegion> getMetaRegionsForTable(byte [] tableName)
throws NotAllMetaRegionsOnlineException {
byte [] firstMetaRegion = null;
Set<MetaRegion> metaRegions = new HashSet<MetaRegion>();
if (Bytes.equals(tableName, HConstants.META_TABLE_NAME)) {
if (rootRegionLocation.get() == null) {
throw new NotAllMetaRegionsOnlineException(
Bytes.toString(HConstants.ROOT_TABLE_NAME));
}
metaRegions.add(new MetaRegion(rootRegionLocation.get(),
HRegionInfo.ROOT_REGIONINFO.getRegionName()));
} else {
if (!areAllMetaRegionsOnline()) {
throw new NotAllMetaRegionsOnlineException();
}
synchronized (onlineMetaRegions) {
if (onlineMetaRegions.size() == 1) {
firstMetaRegion = onlineMetaRegions.firstKey();
@ -599,9 +613,9 @@ class RegionManager implements HConstants {
* @return list of MetaRegion objects
*/
public List<MetaRegion> getListOfOnlineMetaRegions() {
List<MetaRegion> regions = new ArrayList<MetaRegion>();
List<MetaRegion> regions = null;
synchronized(onlineMetaRegions) {
regions.addAll(onlineMetaRegions.values());
regions = new ArrayList<MetaRegion>(onlineMetaRegions.values());
}
return regions;
}

View File

@ -69,12 +69,6 @@ abstract class RegionServerOperation implements Delayed, HConstants {
protected boolean metaTableAvailable() {
boolean available = true;
if (LOG.isDebugEnabled()) {
LOG.debug("numberOfMetaRegions: " +
master.regionManager.numMetaRegions() +
", onlineMetaRegions.size(): " +
master.regionManager.numOnlineMetaRegions());
}
if (master.regionManager.numMetaRegions() !=
master.regionManager.numOnlineMetaRegions()) {
// We can't proceed because not all of the meta regions are online.
@ -83,6 +77,10 @@ abstract class RegionServerOperation implements Delayed, HConstants {
// in the run queue, put this request on the delay queue to give
// other threads the opportunity to get the meta regions on-line.
if (LOG.isDebugEnabled()) {
LOG.debug("numberOfMetaRegions: " +
master.regionManager.numMetaRegions() +
", onlineMetaRegions.size(): " +
master.regionManager.numOnlineMetaRegions());
LOG.debug("Requeuing because not all meta regions are online");
}
available = false;

View File

@ -32,6 +32,7 @@ import org.apache.hadoop.hbase.RemoteExceptionHandler;
import org.apache.hadoop.hbase.TableNotFoundException;
import org.apache.hadoop.hbase.TableNotDisabledException;
import org.apache.hadoop.hbase.ipc.HRegionInterface;
import org.apache.hadoop.hbase.util.Sleeper;
/**
* Uses Callable pattern so that operations against meta regions do not need
@ -39,6 +40,7 @@ import org.apache.hadoop.hbase.ipc.HRegionInterface;
*/
abstract class RetryableMetaOperation<T> implements Callable<T> {
protected final Log LOG = LogFactory.getLog(this.getClass());
protected final Sleeper sleeper;
protected final MetaRegion m;
protected final HMaster master;
@ -47,6 +49,7 @@ abstract class RetryableMetaOperation<T> implements Callable<T> {
protected RetryableMetaOperation(MetaRegion m, HMaster master) {
this.m = m;
this.master = master;
this.sleeper = new Sleeper(master.threadWakeFrequency, master.closed);
}
protected T doWithRetries()
@ -89,7 +92,7 @@ abstract class RetryableMetaOperation<T> implements Callable<T> {
} catch (Exception e) {
throw new RuntimeException(e);
}
master.sleeper.sleep();
sleeper.sleep();
}
return null;
}

View File

@ -127,7 +127,7 @@ class ServerManager implements HConstants {
HServerAddress root = master.getRootRegionLocation();
boolean rootServer = false;
if (root != null && root.equals(storedInfo.getServerAddress())) {
master.regionManager.setRootRegionLocation(null);
master.regionManager.unsetRootRegion();
rootServer = true;
}
try {
@ -293,17 +293,19 @@ class ServerManager implements HConstants {
serversToServerInfo.put(serverName, serverInfo);
HServerLoad load = serversToLoad.get(serverName);
this.master.getMetrics().incrementRequests(load.getNumberOfRequests());
if (load != null && !load.equals(serverInfo.getLoad())) {
// We have previous information about the load on this server
// and the load on this server has changed
synchronized (loadToServers) {
Set<String> servers = loadToServers.get(load);
if (load != null) {
this.master.getMetrics().incrementRequests(load.getNumberOfRequests());
if (!load.equals(serverInfo.getLoad())) {
// We have previous information about the load on this server
// and the load on this server has changed
synchronized (loadToServers) {
Set<String> servers = loadToServers.get(load);
// Note that servers should never be null because loadToServers
// and serversToLoad are manipulated in pairs
servers.remove(serverName);
loadToServers.put(load, servers);
// Note that servers should never be null because loadToServers
// and serversToLoad are manipulated in pairs
servers.remove(serverName);
loadToServers.put(load, servers);
}
}
}
@ -715,19 +717,16 @@ class ServerManager implements HConstants {
}
}
deadServers.add(server);
}
synchronized (serversToServerInfo) {
serversToServerInfo.notifyAll();
}
if (info != null) {
try {
master.toDoQueue.put(
new ProcessServerShutdown(master, info, rootServer));
} catch (InterruptedException e) {
LOG.error("Insertion into toDoQueue was interrupted", e);
LOG.error("insert into toDoQueue was interrupted", e);
}
}
synchronized (serversToServerInfo) {
serversToServerInfo.notifyAll();
}
}
}

View File

@ -699,85 +699,90 @@ public class HLog implements HConstants, Syncable {
LOG.debug("Splitting " + (i + 1) + " of " + logfiles.length + ": " +
logfiles[i].getPath());
}
// Check for empty file.
if (logfiles[i].getLen() <= 0) {
LOG.info("Skipping " + logfiles[i].toString() +
" because zero length");
continue;
}
// Check for possibly empty file. With appends, currently Hadoop reports
// a zero length even if the file has been sync'd. Revisit if
// HADOOP-4751 is committed.
boolean possiblyEmpty = logfiles[i].getLen() <= 0;
HLogKey key = new HLogKey();
HLogEdit val = new HLogEdit();
SequenceFile.Reader in =
new SequenceFile.Reader(fs, logfiles[i].getPath(), conf);
try {
int count = 0;
for (; in.next(key, val); count++) {
byte [] tableName = key.getTablename();
byte [] regionName = key.getRegionName();
SequenceFile.Writer w = logWriters.get(regionName);
if (w == null) {
Path logfile = new Path(
HRegion.getRegionDir(
HTableDescriptor.getTableDir(rootDir, tableName),
HRegionInfo.encodeRegionName(regionName)),
HREGION_OLDLOGFILE_NAME);
Path oldlogfile = null;
SequenceFile.Reader old = null;
if (fs.exists(logfile)) {
LOG.warn("Old log file " + logfile +
" already exists. Copying existing file to new file");
oldlogfile = new Path(logfile.toString() + ".old");
fs.rename(logfile, oldlogfile);
old = new SequenceFile.Reader(fs, oldlogfile, conf);
}
w = SequenceFile.createWriter(fs, conf, logfile, HLogKey.class,
HLogEdit.class, getCompressionType(conf));
// Use copy of regionName; regionName object is reused inside in
// HStoreKey.getRegionName so its content changes as we iterate.
logWriters.put(regionName, w);
if (LOG.isDebugEnabled()) {
LOG.debug("Creating new log file writer for path " + logfile +
" and region " + Bytes.toString(regionName));
}
if (old != null) {
// Copy from existing log file
HLogKey oldkey = new HLogKey();
HLogEdit oldval = new HLogEdit();
for (; old.next(oldkey, oldval); count++) {
if (LOG.isDebugEnabled() && count > 0 && count % 10000 == 0) {
LOG.debug("Copied " + count + " edits");
}
w.append(oldkey, oldval);
SequenceFile.Reader in =
new SequenceFile.Reader(fs, logfiles[i].getPath(), conf);
try {
int count = 0;
for (; in.next(key, val); count++) {
byte [] tableName = key.getTablename();
byte [] regionName = key.getRegionName();
SequenceFile.Writer w = logWriters.get(regionName);
if (w == null) {
Path logfile = new Path(
HRegion.getRegionDir(
HTableDescriptor.getTableDir(rootDir, tableName),
HRegionInfo.encodeRegionName(regionName)),
HREGION_OLDLOGFILE_NAME);
Path oldlogfile = null;
SequenceFile.Reader old = null;
if (fs.exists(logfile)) {
LOG.warn("Old log file " + logfile +
" already exists. Copying existing file to new file");
oldlogfile = new Path(logfile.toString() + ".old");
fs.rename(logfile, oldlogfile);
old = new SequenceFile.Reader(fs, oldlogfile, conf);
}
w = SequenceFile.createWriter(fs, conf, logfile, HLogKey.class,
HLogEdit.class, getCompressionType(conf));
// Use copy of regionName; regionName object is reused inside in
// HStoreKey.getRegionName so its content changes as we iterate.
logWriters.put(regionName, w);
if (LOG.isDebugEnabled()) {
LOG.debug("Creating new log file writer for path " + logfile +
" and region " + Bytes.toString(regionName));
}
if (old != null) {
// Copy from existing log file
HLogKey oldkey = new HLogKey();
HLogEdit oldval = new HLogEdit();
for (; old.next(oldkey, oldval); count++) {
if (LOG.isDebugEnabled() && count > 0 && count % 10000 == 0) {
LOG.debug("Copied " + count + " edits");
}
w.append(oldkey, oldval);
}
old.close();
fs.delete(oldlogfile, true);
}
old.close();
fs.delete(oldlogfile, true);
}
w.append(key, val);
}
w.append(key, val);
}
if (LOG.isDebugEnabled()) {
LOG.debug("Applied " + count + " total edits from " +
logfiles[i].getPath().toString());
if (LOG.isDebugEnabled()) {
LOG.debug("Applied " + count + " total edits from " +
logfiles[i].getPath().toString());
}
} catch (IOException e) {
e = RemoteExceptionHandler.checkIOException(e);
if (!(e instanceof EOFException)) {
LOG.warn("Exception processing " + logfiles[i].getPath() +
" -- continuing. Possible DATA LOSS!", e);
}
} finally {
try {
in.close();
} catch (IOException e) {
LOG.warn("Close in finally threw exception -- continuing", e);
}
// Delete the input file now so we do not replay edits. We could
// have gotten here because of an exception. If so, probably
// nothing we can do about it. Replaying it, it could work but we
// could be stuck replaying for ever. Just continue though we
// could have lost some edits.
fs.delete(logfiles[i].getPath(), true);
}
} catch (IOException e) {
e = RemoteExceptionHandler.checkIOException(e);
if (!(e instanceof EOFException)) {
LOG.warn("Exception processing " + logfiles[i].getPath() +
" -- continuing. Possible DATA LOSS!", e);
if (possiblyEmpty) {
continue;
}
} finally {
try {
in.close();
} catch (IOException e) {
LOG.warn("Close in finally threw exception -- continuing", e);
}
// Delete the input file now so we do not replay edits. We could
// have gotten here because of an exception. If so, probably
// nothing we can do about it. Replaying it, it could work but we
// could be stuck replaying for ever. Just continue though we
// could have lost some edits.
fs.delete(logfiles[i].getPath(), true);
throw e;
}
}
} finally {