HBASE-3291 If split happens while regionserver is going down, we can stick open

git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1041217 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael Stack 2010-12-02 00:00:30 +00:00
parent 8f35221a02
commit 1955d5dfe4
3 changed files with 92 additions and 41 deletions

View File

@ -732,6 +732,7 @@ Release 0.90.0 - Unreleased
HBASE-3294 WARN org.apache.hadoop.hbase.regionserver.Store: Not in set HBASE-3294 WARN org.apache.hadoop.hbase.regionserver.Store: Not in set
(double-remove?) org.apache.hadoop.hbase.regionserver.StoreScanner@76607d3d (double-remove?) org.apache.hadoop.hbase.regionserver.StoreScanner@76607d3d
HBASE-3299 If failed open, we don't output the IOE HBASE-3299 If failed open, we don't output the IOE
HBASE-3291 If split happens while regionserver is going down, we can stick open.
IMPROVEMENTS IMPROVEMENTS

View File

@ -30,6 +30,7 @@ import java.util.ArrayList;
import java.util.Collection; import java.util.Collection;
import java.util.Collections; import java.util.Collections;
import java.util.Comparator; import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet; import java.util.HashSet;
import java.util.Iterator; import java.util.Iterator;
import java.util.LinkedList; import java.util.LinkedList;
@ -170,10 +171,10 @@ public class HRegionServer implements HRegionInterface, HBaseRPCErrorHandler,
/** /**
* Map of regions currently being served by this region server. Key is the * Map of regions currently being served by this region server. Key is the
* encoded region name. * encoded region name. All access should be synchronized.
*/ */
protected final Map<String, HRegion> onlineRegions = protected final Map<String, HRegion> onlineRegions =
new ConcurrentHashMap<String, HRegion>(); new HashMap<String, HRegion>();
protected final ReentrantReadWriteLock lock = new ReentrantReadWriteLock(); protected final ReentrantReadWriteLock lock = new ReentrantReadWriteLock();
private final LinkedBlockingQueue<HMsg> outboundMsgs = new LinkedBlockingQueue<HMsg>(); private final LinkedBlockingQueue<HMsg> outboundMsgs = new LinkedBlockingQueue<HMsg>();
@ -546,11 +547,11 @@ public class HRegionServer implements HRegionInterface, HBaseRPCErrorHandler,
// The main run loop. // The main run loop.
for (int tries = 0; !this.stopped && isHealthy();) { for (int tries = 0; !this.stopped && isHealthy();) {
if (!isClusterUp()) { if (!isClusterUp()) {
if (this.onlineRegions.isEmpty()) { if (isOnlineRegionsEmpty()) {
stop("Exiting; cluster shutdown set and not carrying any regions"); stop("Exiting; cluster shutdown set and not carrying any regions");
} else if (!this.stopping) { } else if (!this.stopping) {
closeUserRegions(this.abortRequested);
this.stopping = true; this.stopping = true;
closeUserRegions(this.abortRequested);
} }
} }
long now = System.currentTimeMillis(); long now = System.currentTimeMillis();
@ -660,18 +661,20 @@ public class HRegionServer implements HRegionInterface, HBaseRPCErrorHandler,
private void waitOnAllRegionsToClose() { private void waitOnAllRegionsToClose() {
// Wait till all regions are closed before going out. // Wait till all regions are closed before going out.
int lastCount = -1; int lastCount = -1;
while (!this.onlineRegions.isEmpty()) { while (!isOnlineRegionsEmpty()) {
int count = this.onlineRegions.size(); int count = getNumberOfOnlineRegions();
// Only print a message if the count of regions has changed. // Only print a message if the count of regions has changed.
if (count != lastCount) { if (count != lastCount) {
lastCount = count; lastCount = count;
LOG.info("Waiting on " + count + " regions to close"); LOG.info("Waiting on " + count + " regions to close");
// Only print out regions still closing if a small number else will // Only print out regions still closing if a small number else will
// swamp the log. // swamp the log.
if (count < 10) { if (count < 10 && LOG.isDebugEnabled()) {
synchronized (this.onlineRegions) {
LOG.debug(this.onlineRegions); LOG.debug(this.onlineRegions);
} }
} }
}
Threads.sleep(1000); Threads.sleep(1000);
} }
} }
@ -721,9 +724,11 @@ public class HRegionServer implements HRegionInterface, HBaseRPCErrorHandler,
HServerLoad hsl = new HServerLoad(requestCount.get(), HServerLoad hsl = new HServerLoad(requestCount.get(),
(int)(memory.getUsed() / 1024 / 1024), (int)(memory.getUsed() / 1024 / 1024),
(int) (memory.getMax() / 1024 / 1024)); (int) (memory.getMax() / 1024 / 1024));
synchronized (this.onlineRegions) {
for (HRegion r : this.onlineRegions.values()) { for (HRegion r : this.onlineRegions.values()) {
hsl.addRegionInfo(createRegionLoad(r)); hsl.addRegionInfo(createRegionLoad(r));
} }
}
return hsl; return hsl;
} }
@ -884,7 +889,11 @@ public class HRegionServer implements HRegionInterface, HBaseRPCErrorHandler,
* @throws IOException * @throws IOException
*/ */
public HServerLoad.RegionLoad createRegionLoad(final String encodedRegionName) { public HServerLoad.RegionLoad createRegionLoad(final String encodedRegionName) {
return createRegionLoad(this.onlineRegions.get(encodedRegionName)); HRegion r = null;
synchronized (this.onlineRegions) {
r = this.onlineRegions.get(encodedRegionName);
}
return createRegionLoad(r);
} }
/* /*
@ -1000,6 +1009,7 @@ public class HRegionServer implements HRegionInterface, HBaseRPCErrorHandler,
@Override @Override
protected void chore() { protected void chore() {
synchronized (this.instance.onlineRegions) {
for (HRegion r : this.instance.onlineRegions.values()) { for (HRegion r : this.instance.onlineRegions.values()) {
try { try {
if (r != null && r.isMajorCompaction()) { if (r != null && r.isMajorCompaction()) {
@ -1013,6 +1023,7 @@ public class HRegionServer implements HRegionInterface, HBaseRPCErrorHandler,
} }
} }
} }
}
/** /**
* Report the status of the server. A server is online once all the startup is * Report the status of the server. A server is online once all the startup is
@ -1491,6 +1502,7 @@ public class HRegionServer implements HRegionInterface, HBaseRPCErrorHandler,
HRegion root = null; HRegion root = null;
this.lock.writeLock().lock(); this.lock.writeLock().lock();
try { try {
synchronized (this.onlineRegions) {
for (Map.Entry<String, HRegion> e: onlineRegions.entrySet()) { for (Map.Entry<String, HRegion> e: onlineRegions.entrySet()) {
HRegionInfo hri = e.getValue().getRegionInfo(); HRegionInfo hri = e.getValue().getRegionInfo();
if (hri.isRootRegion()) { if (hri.isRootRegion()) {
@ -1500,6 +1512,7 @@ public class HRegionServer implements HRegionInterface, HBaseRPCErrorHandler,
} }
if (meta != null && root != null) break; if (meta != null && root != null) break;
} }
}
} finally { } finally {
this.lock.writeLock().unlock(); this.lock.writeLock().unlock();
} }
@ -2056,12 +2069,15 @@ public class HRegionServer implements HRegionInterface, HBaseRPCErrorHandler,
public boolean closeRegion(HRegionInfo region, final boolean zk) public boolean closeRegion(HRegionInfo region, final boolean zk)
throws NotServingRegionException { throws NotServingRegionException {
LOG.info("Received close region: " + region.getRegionNameAsString()); LOG.info("Received close region: " + region.getRegionNameAsString());
if (!onlineRegions.containsKey(region.getEncodedName())) { synchronized (this.onlineRegions) {
boolean hasit = this.onlineRegions.containsKey(region.getEncodedName());
if (!hasit) {
LOG.warn("Received close for region we are not serving; " + LOG.warn("Received close for region we are not serving; " +
region.getEncodedName()); region.getEncodedName());
throw new NotServingRegionException("Received close for " throw new NotServingRegionException("Received close for "
+ region.getRegionNameAsString() + " but we are not serving it"); + region.getRegionNameAsString() + " but we are not serving it");
} }
}
return closeRegion(region, false, zk); return closeRegion(region, false, zk);
} }
@ -2162,7 +2178,17 @@ public class HRegionServer implements HRegionInterface, HBaseRPCErrorHandler,
} }
public int getNumberOfOnlineRegions() { public int getNumberOfOnlineRegions() {
return onlineRegions.size(); int size = -1;
synchronized (this.onlineRegions) {
size = this.onlineRegions.size();
}
return size;
}
boolean isOnlineRegionsEmpty() {
synchronized (this.onlineRegions) {
return this.onlineRegions.isEmpty();
}
} }
/** /**
@ -2172,14 +2198,19 @@ public class HRegionServer implements HRegionInterface, HBaseRPCErrorHandler,
* @see #getOnlineRegions() * @see #getOnlineRegions()
*/ */
public Collection<HRegion> getOnlineRegionsLocalContext() { public Collection<HRegion> getOnlineRegionsLocalContext() {
return Collections.unmodifiableCollection(this.onlineRegions.values()); synchronized (this.onlineRegions) {
Collection<HRegion> regions = this.onlineRegions.values();
return Collections.unmodifiableCollection(regions);
}
} }
@Override @Override
public void addToOnlineRegions(HRegion region) { public void addToOnlineRegions(HRegion region) {
lock.writeLock().lock(); lock.writeLock().lock();
try { try {
onlineRegions.put(region.getRegionInfo().getEncodedName(), region); synchronized (this.onlineRegions) {
this.onlineRegions.put(region.getRegionInfo().getEncodedName(), region);
}
} finally { } finally {
lock.writeLock().unlock(); lock.writeLock().unlock();
} }
@ -2190,7 +2221,9 @@ public class HRegionServer implements HRegionInterface, HBaseRPCErrorHandler,
this.lock.writeLock().lock(); this.lock.writeLock().lock();
HRegion toReturn = null; HRegion toReturn = null;
try { try {
toReturn = onlineRegions.remove(encodedName); synchronized (this.onlineRegions) {
toReturn = this.onlineRegions.remove(encodedName);
}
} finally { } finally {
this.lock.writeLock().unlock(); this.lock.writeLock().unlock();
} }
@ -2220,7 +2253,11 @@ public class HRegionServer implements HRegionInterface, HBaseRPCErrorHandler,
@Override @Override
public HRegion getFromOnlineRegions(final String encodedRegionName) { public HRegion getFromOnlineRegions(final String encodedRegionName) {
return onlineRegions.get(encodedRegionName); HRegion r = null;
synchronized (this.onlineRegions) {
r = this.onlineRegions.get(encodedRegionName);
}
return r;
} }
/** /**
@ -2313,7 +2350,9 @@ public class HRegionServer implements HRegionInterface, HBaseRPCErrorHandler,
// TODO: is this locking necessary? // TODO: is this locking necessary?
lock.readLock().lock(); lock.readLock().lock();
try { try {
synchronized (this.onlineRegions) {
regionsToCheck.addAll(this.onlineRegions.values()); regionsToCheck.addAll(this.onlineRegions.values());
}
} finally { } finally {
lock.readLock().unlock(); lock.readLock().unlock();
} }
@ -2446,13 +2485,15 @@ public class HRegionServer implements HRegionInterface, HBaseRPCErrorHandler,
} }
public HRegionInfo[] getRegionsAssignment() throws IOException { public HRegionInfo[] getRegionsAssignment() throws IOException {
HRegionInfo[] regions = new HRegionInfo[onlineRegions.size()]; synchronized (this.onlineRegions) {
HRegionInfo [] regions = new HRegionInfo[getNumberOfOnlineRegions()];
Iterator<HRegion> ite = onlineRegions.values().iterator(); Iterator<HRegion> ite = onlineRegions.values().iterator();
for (int i = 0; ite.hasNext(); i++) { for (int i = 0; ite.hasNext(); i++) {
regions[i] = ite.next().getRegionInfo(); regions[i] = ite.next().getRegionInfo();
} }
return regions; return regions;
} }
}
/** {@inheritDoc} */ /** {@inheritDoc} */
public HServerInfo getHServerInfo() throws IOException { public HServerInfo getHServerInfo() throws IOException {

View File

@ -301,6 +301,15 @@ public class SplitTransaction {
void openDaughterRegion(final Server server, void openDaughterRegion(final Server server,
final RegionServerServices services, final HRegion daughter) final RegionServerServices services, final HRegion daughter)
throws IOException, KeeperException { throws IOException, KeeperException {
if (server.isStopped() || services.isStopping()) {
MetaEditor.addDaughter(server.getCatalogTracker(),
daughter.getRegionInfo(), null);
LOG.info("Not opening daughter " +
daughter.getRegionInfo().getRegionNameAsString() +
" because stopping=" + services.isStopping() + ", stopped=" +
server.isStopped());
return;
}
HRegionInfo hri = daughter.getRegionInfo(); HRegionInfo hri = daughter.getRegionInfo();
LoggingProgressable reporter = LoggingProgressable reporter =
new LoggingProgressable(hri, server.getConfiguration()); new LoggingProgressable(hri, server.getConfiguration());