HBASE-670 Historian deadlocks if regionserver is at global memory boundary and is hosting .META.; version 2

git-svn-id: https://svn.apache.org/repos/asf/hadoop/hbase/trunk@664280 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael Stack 2008-06-07 04:45:41 +00:00
parent 673defd74c
commit 4400995ff8
3 changed files with 65 additions and 40 deletions

View File

@ -180,11 +180,16 @@ public class RegionHistorian implements HConstants {
* Method to add a compaction event to the row in the .META table
* @param info
*/
public void addRegionCompaction(HRegionInfo info,
@SuppressWarnings("unused") String timeTaken) {
// Disabled. Noop. If this regionserver is hosting the .META. AND is
// holding the reclaimMemcacheMemory global lock, we deadlock. For now,
// just disable logging of flushes and compactions.
public void addRegionCompaction(final HRegionInfo info,
final String timeTaken) {
// While historian can not log flushes because it could deadlock the
// regionserver -- see the note in addRegionFlush -- there should be no
// such danger compacting; compactions are not allowed when
// Flusher#flushSomeRegions is run.
if (LOG.isDebugEnabled()) {
add(HistorianColumnKey.REGION_COMPACTION.key,
"Region compaction completed in " + timeTaken, info);
}
}
/**
@ -194,8 +199,9 @@ public class RegionHistorian implements HConstants {
public void addRegionFlush(HRegionInfo info,
@SuppressWarnings("unused") String timeTaken) {
// Disabled. Noop. If this regionserver is hosting the .META. AND is
// holding the reclaimMemcacheMemory global lock, we deadlock. For now,
// just disable logging of flushes and compactions.
// holding the reclaimMemcacheMemory global lock --
// see Flusher#flushSomeRegions -- we deadlock. For now, just disable
// logging of flushes.
}
/**

View File

@ -53,7 +53,6 @@ class Flusher extends Thread implements FlushRequester {
private final long optionalFlushPeriod;
private final HRegionServer server;
private final ReentrantLock lock = new ReentrantLock();
private final Integer memcacheSizeLock = new Integer(0);
private long lastOptionalCheck = System.currentTimeMillis();
protected final long globalMemcacheLimit;
@ -126,23 +125,30 @@ class Flusher extends Thread implements FlushRequester {
}
}
/**
* Flush a region right away, while respecting concurrency with the async
* flushing that is always going on.
/*
* Flush a region.
*
* @param region the region to be flushed
* @param removeFromQueue true if the region needs to be removed from the
* flush queue. False if called from the main run loop and true if called from
* flushSomeRegions to relieve memory pressure from the region server.
* @param removeFromQueue True if the region needs to be removed from the
* flush queue. False if called from the main flusher run loop and true if
* called from flushSomeRegions to relieve memory pressure from the region
* server. If <code>true</code>, we are in a state of emergency; we are not
* taking on updates regionserver-wide, not until memory is flushed. In this
* case, do not let a compaction run inline with blocked updates. Compactions
* can take a long time. Stopping compactions, there is a danger that number
* of flushes will overwhelm compaction on a busy server; we'll have to see.
* That compactions do not run when called out of flushSomeRegions means that
* compactions can be reported by the historian without danger of deadlock
* (HBASE-670).
*
* <p>In the main run loop, regions have already been removed from the flush
* queue, and if this method is called for the relief of memory pressure,
* this may not be necessarily true. We want to avoid trying to remove
* region from the queue because if it has already been removed, it reqires a
* region from the queue because if it has already been removed, it requires a
* sequential scan of the queue to determine that it is not in the queue.
*
* <p>If called from flushSomeRegions, the region may be in the queue but
* it may have been determined that the region had a significant amout of
* it may have been determined that the region had a significant amount of
* memory in use and needed to be flushed to relieve memory pressure. In this
* case, its flush may preempt the pending request in the queue, and if so,
* it needs to be removed from the queue to avoid flushing the region multiple
@ -163,7 +169,9 @@ class Flusher extends Thread implements FlushRequester {
}
lock.lock();
try {
if (region.flushcache()) {
// See javadoc comment above for removeFromQueue on why we do not
// compact if removeFromQueue is true.
if (region.flushcache() && !removeFromQueue) {
server.compactSplitThread.compactionRequested(region);
}
} catch (DroppedSnapshotException ex) {
@ -242,38 +250,26 @@ class Flusher extends Thread implements FlushRequester {
* amount of memcache consumption.
*/
public void reclaimMemcacheMemory() {
synchronized (memcacheSizeLock) {
if (server.getGlobalMemcacheSize() >= globalMemcacheLimit) {
flushSomeRegions();
}
if (server.getGlobalMemcacheSize() >= globalMemcacheLimit) {
flushSomeRegions();
}
}
private void flushSomeRegions() {
// we'll sort the regions in reverse
SortedMap<Long, HRegion> sortedRegions = new TreeMap<Long, HRegion>(
new Comparator<Long>() {
public int compare(Long a, Long b) {
return -1 * a.compareTo(b);
}
}
);
// copy over all the regions
for (HRegion region : server.onlineRegions.values()) {
sortedRegions.put(region.memcacheSize.get(), region);
}
/*
* Emergency! Need to flush memory. While running this method all updates
* to this regionserver are blocked.
*/
private synchronized void flushSomeRegions() {
SortedMap<Long, HRegion> m =
this.server.getCopyOfOnlineRegionsSortedBySize();
// keep flushing until we hit the low water mark
while (server.getGlobalMemcacheSize() >= globalMemcacheLimitLowMark) {
// flush the region with the biggest memcache
HRegion biggestMemcacheRegion =
sortedRegions.remove(sortedRegions.firstKey());
HRegion biggestMemcacheRegion = m.remove(m.firstKey());
if (!flushRegion(biggestMemcacheRegion, true)) {
// Something bad happened - give up.
break;
}
}
}
}

View File

@ -28,6 +28,7 @@ import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
@ -35,6 +36,7 @@ import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.Set;
import java.util.SortedMap;
import java.util.TreeMap;
import java.util.TreeSet;
import java.util.concurrent.BlockingQueue;
@ -1305,6 +1307,27 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
public Collection<HRegion> getOnlineRegions() {
return Collections.unmodifiableCollection(onlineRegions.values());
}
/**
* @return A new Map of online regions sorted by region size with the first
* entry being the biggest.
*/
public SortedMap<Long, HRegion> getCopyOfOnlineRegionsSortedBySize() {
// we'll sort the regions in reverse
SortedMap<Long, HRegion> sortedRegions = new TreeMap<Long, HRegion>(
new Comparator<Long>() {
public int compare(Long a, Long b) {
return -1 * a.compareTo(b);
}
});
// Copy over all regions. Regions are sorted by size with biggest first.
synchronized (this.onlineRegions) {
for (HRegion region : this.onlineRegions.values()) {
sortedRegions.put(region.memcacheSize.get(), region);
}
}
return sortedRegions;
}
/**
* @param regionName