HBASE-670 Historian deadlocks if regionserver is at global memory boundary and is hosting .META.; version 2

git-svn-id: https://svn.apache.org/repos/asf/hadoop/hbase/trunk@664280 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael Stack 2008-06-07 04:45:41 +00:00
parent 673defd74c
commit 4400995ff8
3 changed files with 65 additions and 40 deletions

View File

@ -180,11 +180,16 @@ public class RegionHistorian implements HConstants {
* Method to add a compaction event to the row in the .META table * Method to add a compaction event to the row in the .META table
* @param info * @param info
*/ */
public void addRegionCompaction(HRegionInfo info, public void addRegionCompaction(final HRegionInfo info,
@SuppressWarnings("unused") String timeTaken) { final String timeTaken) {
// Disabled. Noop. If this regionserver is hosting the .META. AND is // While historian can not log flushes because it could deadlock the
// holding the reclaimMemcacheMemory global lock, we deadlock. For now, // regionserver -- see the note in addRegionFlush -- there should be no
// just disable logging of flushes and compactions. // such danger compacting; compactions are not allowed when
// Flusher#flushSomeRegions is run.
if (LOG.isDebugEnabled()) {
add(HistorianColumnKey.REGION_COMPACTION.key,
"Region compaction completed in " + timeTaken, info);
}
} }
/** /**
@ -194,8 +199,9 @@ public class RegionHistorian implements HConstants {
public void addRegionFlush(HRegionInfo info, public void addRegionFlush(HRegionInfo info,
@SuppressWarnings("unused") String timeTaken) { @SuppressWarnings("unused") String timeTaken) {
// Disabled. Noop. If this regionserver is hosting the .META. AND is // Disabled. Noop. If this regionserver is hosting the .META. AND is
// holding the reclaimMemcacheMemory global lock, we deadlock. For now, // holding the reclaimMemcacheMemory global lock --
// just disable logging of flushes and compactions. // see Flusher#flushSomeRegions -- we deadlock. For now, just disable
// logging of flushes.
} }
/** /**

View File

@ -53,7 +53,6 @@ class Flusher extends Thread implements FlushRequester {
private final long optionalFlushPeriod; private final long optionalFlushPeriod;
private final HRegionServer server; private final HRegionServer server;
private final ReentrantLock lock = new ReentrantLock(); private final ReentrantLock lock = new ReentrantLock();
private final Integer memcacheSizeLock = new Integer(0);
private long lastOptionalCheck = System.currentTimeMillis(); private long lastOptionalCheck = System.currentTimeMillis();
protected final long globalMemcacheLimit; protected final long globalMemcacheLimit;
@ -126,23 +125,30 @@ class Flusher extends Thread implements FlushRequester {
} }
} }
/** /*
* Flush a region right away, while respecting concurrency with the async * Flush a region.
* flushing that is always going on.
* *
* @param region the region to be flushed * @param region the region to be flushed
* @param removeFromQueue true if the region needs to be removed from the * @param removeFromQueue True if the region needs to be removed from the
* flush queue. False if called from the main run loop and true if called from * flush queue. False if called from the main flusher run loop and true if
* flushSomeRegions to relieve memory pressure from the region server. * called from flushSomeRegions to relieve memory pressure from the region
* server. If <code>true</code>, we are in a state of emergency; we are not
* taking on updates regionserver-wide, not until memory is flushed. In this
* case, do not let a compaction run inline with blocked updates. Compactions
* can take a long time. Stopping compactions, there is a danger that number
* of flushes will overwhelm compaction on a busy server; we'll have to see.
* That compactions do not run when called out of flushSomeRegions means that
* compactions can be reported by the historian without danger of deadlock
* (HBASE-670).
* *
* <p>In the main run loop, regions have already been removed from the flush * <p>In the main run loop, regions have already been removed from the flush
* queue, and if this method is called for the relief of memory pressure, * queue, and if this method is called for the relief of memory pressure,
* this may not be necessarily true. We want to avoid trying to remove * this may not be necessarily true. We want to avoid trying to remove
* region from the queue because if it has already been removed, it reqires a * region from the queue because if it has already been removed, it requires a
* sequential scan of the queue to determine that it is not in the queue. * sequential scan of the queue to determine that it is not in the queue.
* *
* <p>If called from flushSomeRegions, the region may be in the queue but * <p>If called from flushSomeRegions, the region may be in the queue but
* it may have been determined that the region had a significant amout of * it may have been determined that the region had a significant amount of
* memory in use and needed to be flushed to relieve memory pressure. In this * memory in use and needed to be flushed to relieve memory pressure. In this
* case, its flush may preempt the pending request in the queue, and if so, * case, its flush may preempt the pending request in the queue, and if so,
* it needs to be removed from the queue to avoid flushing the region multiple * it needs to be removed from the queue to avoid flushing the region multiple
@ -163,7 +169,9 @@ class Flusher extends Thread implements FlushRequester {
} }
lock.lock(); lock.lock();
try { try {
if (region.flushcache()) { // See javadoc comment above for removeFromQueue on why we do not
// compact if removeFromQueue is true.
if (region.flushcache() && !removeFromQueue) {
server.compactSplitThread.compactionRequested(region); server.compactSplitThread.compactionRequested(region);
} }
} catch (DroppedSnapshotException ex) { } catch (DroppedSnapshotException ex) {
@ -242,38 +250,26 @@ class Flusher extends Thread implements FlushRequester {
* amount of memcache consumption. * amount of memcache consumption.
*/ */
public void reclaimMemcacheMemory() { public void reclaimMemcacheMemory() {
synchronized (memcacheSizeLock) { if (server.getGlobalMemcacheSize() >= globalMemcacheLimit) {
if (server.getGlobalMemcacheSize() >= globalMemcacheLimit) { flushSomeRegions();
flushSomeRegions();
}
} }
} }
private void flushSomeRegions() { /*
// we'll sort the regions in reverse * Emergency! Need to flush memory. While running this method all updates
SortedMap<Long, HRegion> sortedRegions = new TreeMap<Long, HRegion>( * to this regionserver are blocked.
new Comparator<Long>() { */
public int compare(Long a, Long b) { private synchronized void flushSomeRegions() {
return -1 * a.compareTo(b); SortedMap<Long, HRegion> m =
} this.server.getCopyOfOnlineRegionsSortedBySize();
}
);
// copy over all the regions
for (HRegion region : server.onlineRegions.values()) {
sortedRegions.put(region.memcacheSize.get(), region);
}
// keep flushing until we hit the low water mark // keep flushing until we hit the low water mark
while (server.getGlobalMemcacheSize() >= globalMemcacheLimitLowMark) { while (server.getGlobalMemcacheSize() >= globalMemcacheLimitLowMark) {
// flush the region with the biggest memcache // flush the region with the biggest memcache
HRegion biggestMemcacheRegion = HRegion biggestMemcacheRegion = m.remove(m.firstKey());
sortedRegions.remove(sortedRegions.firstKey());
if (!flushRegion(biggestMemcacheRegion, true)) { if (!flushRegion(biggestMemcacheRegion, true)) {
// Something bad happened - give up. // Something bad happened - give up.
break; break;
} }
} }
} }
} }

View File

@ -28,6 +28,7 @@ import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.Collection; import java.util.Collection;
import java.util.Collections; import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap; import java.util.HashMap;
import java.util.HashSet; import java.util.HashSet;
import java.util.Iterator; import java.util.Iterator;
@ -35,6 +36,7 @@ import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Random; import java.util.Random;
import java.util.Set; import java.util.Set;
import java.util.SortedMap;
import java.util.TreeMap; import java.util.TreeMap;
import java.util.TreeSet; import java.util.TreeSet;
import java.util.concurrent.BlockingQueue; import java.util.concurrent.BlockingQueue;
@ -1305,6 +1307,27 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
public Collection<HRegion> getOnlineRegions() { public Collection<HRegion> getOnlineRegions() {
return Collections.unmodifiableCollection(onlineRegions.values()); return Collections.unmodifiableCollection(onlineRegions.values());
} }
/**
* @return A new Map of online regions sorted by region size with the first
* entry being the biggest.
*/
public SortedMap<Long, HRegion> getCopyOfOnlineRegionsSortedBySize() {
// we'll sort the regions in reverse
SortedMap<Long, HRegion> sortedRegions = new TreeMap<Long, HRegion>(
new Comparator<Long>() {
public int compare(Long a, Long b) {
return -1 * a.compareTo(b);
}
});
// Copy over all regions. Regions are sorted by size with biggest first.
synchronized (this.onlineRegions) {
for (HRegion region : this.onlineRegions.values()) {
sortedRegions.put(region.memcacheSize.get(), region);
}
}
return sortedRegions;
}
/** /**
* @param regionName * @param regionName