MAPREDUCE-7277. IndexCache totalMemoryUsed differs from cache contents. Contributed by Jon Eagles (jeagles).
This commit is contained in:
parent
71063434a6
commit
568f185bf8
|
@ -22,17 +22,17 @@ import java.util.concurrent.ConcurrentHashMap;
|
||||||
import java.util.concurrent.LinkedBlockingQueue;
|
import java.util.concurrent.LinkedBlockingQueue;
|
||||||
import java.util.concurrent.atomic.AtomicInteger;
|
import java.util.concurrent.atomic.AtomicInteger;
|
||||||
|
|
||||||
import org.apache.commons.logging.Log;
|
|
||||||
import org.apache.commons.logging.LogFactory;
|
|
||||||
import org.apache.hadoop.fs.Path;
|
import org.apache.hadoop.fs.Path;
|
||||||
import org.apache.hadoop.mapreduce.server.tasktracker.TTConfig;
|
import org.apache.hadoop.mapreduce.server.tasktracker.TTConfig;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
class IndexCache {
|
class IndexCache {
|
||||||
|
|
||||||
private final JobConf conf;
|
private final JobConf conf;
|
||||||
private final int totalMemoryAllowed;
|
private final int totalMemoryAllowed;
|
||||||
private AtomicInteger totalMemoryUsed = new AtomicInteger();
|
private AtomicInteger totalMemoryUsed = new AtomicInteger();
|
||||||
private static final Log LOG = LogFactory.getLog(IndexCache.class);
|
private static final Logger LOG = LoggerFactory.getLogger(IndexCache.class);
|
||||||
|
|
||||||
private final ConcurrentHashMap<String,IndexInformation> cache =
|
private final ConcurrentHashMap<String,IndexInformation> cache =
|
||||||
new ConcurrentHashMap<String,IndexInformation>();
|
new ConcurrentHashMap<String,IndexInformation>();
|
||||||
|
@ -72,11 +72,12 @@ class IndexCache {
|
||||||
try {
|
try {
|
||||||
info.wait();
|
info.wait();
|
||||||
} catch (InterruptedException e) {
|
} catch (InterruptedException e) {
|
||||||
|
Thread.currentThread().interrupt();
|
||||||
throw new IOException("Interrupted waiting for construction", e);
|
throw new IOException("Interrupted waiting for construction", e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
LOG.debug("IndexCache HIT: MapId " + mapId + " found");
|
LOG.debug("IndexCache HIT: MapId {} found", mapId);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (info.mapSpillRecord.size() == 0 ||
|
if (info.mapSpillRecord.size() == 0 ||
|
||||||
|
@ -106,63 +107,91 @@ class IndexCache {
|
||||||
try {
|
try {
|
||||||
info.wait();
|
info.wait();
|
||||||
} catch (InterruptedException e) {
|
} catch (InterruptedException e) {
|
||||||
|
Thread.currentThread().interrupt();
|
||||||
throw new IOException("Interrupted waiting for construction", e);
|
throw new IOException("Interrupted waiting for construction", e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
LOG.debug("IndexCache HIT: MapId " + mapId + " found");
|
LOG.debug("IndexCache HIT: MapId {} found", mapId);
|
||||||
return info;
|
return info;
|
||||||
}
|
}
|
||||||
LOG.debug("IndexCache MISS: MapId " + mapId + " not found") ;
|
LOG.debug("IndexCache MISS: MapId {} not found", mapId);
|
||||||
SpillRecord tmp = null;
|
SpillRecord tmp = null;
|
||||||
|
boolean success = false;
|
||||||
try {
|
try {
|
||||||
tmp = new SpillRecord(indexFileName, conf, expectedIndexOwner);
|
tmp = new SpillRecord(indexFileName, conf, expectedIndexOwner);
|
||||||
|
success = true;
|
||||||
} catch (Throwable e) {
|
} catch (Throwable e) {
|
||||||
tmp = new SpillRecord(0);
|
tmp = new SpillRecord(0);
|
||||||
cache.remove(mapId);
|
cache.remove(mapId);
|
||||||
|
if (e instanceof InterruptedException) {
|
||||||
|
Thread.currentThread().interrupt();
|
||||||
|
}
|
||||||
throw new IOException("Error Reading IndexFile", e);
|
throw new IOException("Error Reading IndexFile", e);
|
||||||
} finally {
|
} finally {
|
||||||
synchronized (newInd) {
|
synchronized (newInd) {
|
||||||
newInd.mapSpillRecord = tmp;
|
newInd.mapSpillRecord = tmp;
|
||||||
|
if (success) {
|
||||||
|
// Only add mapId to the queue for successful read and after added to
|
||||||
|
// the cache. Once in the queue, it is now eligible for removal once
|
||||||
|
// construction is finished.
|
||||||
|
queue.add(mapId);
|
||||||
|
if (totalMemoryUsed.addAndGet(newInd.getSize()) > totalMemoryAllowed) {
|
||||||
|
freeIndexInformation();
|
||||||
|
}
|
||||||
|
}
|
||||||
newInd.notifyAll();
|
newInd.notifyAll();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
queue.add(mapId);
|
|
||||||
|
|
||||||
if (totalMemoryUsed.addAndGet(newInd.getSize()) > totalMemoryAllowed) {
|
|
||||||
freeIndexInformation();
|
|
||||||
}
|
|
||||||
return newInd;
|
return newInd;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This method removes the map from the cache if index information for this
|
* This method removes the map from the cache if it is present in the queue.
|
||||||
* map is loaded(size>0), index information entry in cache will not be
|
|
||||||
* removed if it is in the loading phrase(size=0), this prevents corruption
|
|
||||||
* of totalMemoryUsed. It should be called when a map output on this tracker
|
|
||||||
* is discarded.
|
|
||||||
* @param mapId The taskID of this map.
|
* @param mapId The taskID of this map.
|
||||||
*/
|
*/
|
||||||
public void removeMap(String mapId) {
|
public void removeMap(String mapId) throws IOException {
|
||||||
IndexInformation info = cache.get(mapId);
|
// Successfully removing the mapId from the queue enters into a contract
|
||||||
if (info == null || isUnderConstruction(info)) {
|
// that this thread will remove the corresponding mapId from the cache.
|
||||||
|
if (!queue.remove(mapId)) {
|
||||||
|
LOG.debug("Map ID {} not found in queue", mapId);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
info = cache.remove(mapId);
|
removeMapInternal(mapId);
|
||||||
if (info != null) {
|
}
|
||||||
totalMemoryUsed.addAndGet(-info.getSize());
|
|
||||||
if (!queue.remove(mapId)) {
|
/** This method should only be called upon successful removal of mapId from
|
||||||
LOG.warn("Map ID" + mapId + " not found in queue!!");
|
* the queue. The mapId will be removed from the cache and totalUsedMemory
|
||||||
|
* will be decremented.
|
||||||
|
* @param mapId the cache item to be removed
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
private void removeMapInternal(String mapId) throws IOException {
|
||||||
|
IndexInformation info = cache.remove(mapId);
|
||||||
|
if (info == null) {
|
||||||
|
// Inconsistent state as presence in queue implies presence in cache
|
||||||
|
LOG.warn("Map ID " + mapId + " not found in cache");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
synchronized(info) {
|
||||||
|
while (isUnderConstruction(info)) {
|
||||||
|
info.wait();
|
||||||
|
}
|
||||||
|
totalMemoryUsed.getAndAdd(-info.getSize());
|
||||||
}
|
}
|
||||||
} else {
|
} catch (InterruptedException e) {
|
||||||
LOG.info("Map ID " + mapId + " not found in cache");
|
totalMemoryUsed.getAndAdd(-info.getSize());
|
||||||
|
Thread.currentThread().interrupt();
|
||||||
|
throw new IOException("Interrupted waiting for construction", e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This method checks if cache and totolMemoryUsed is consistent.
|
* This method checks if cache and totalMemoryUsed is consistent.
|
||||||
* It is only used for unit test.
|
* It is only used for unit test.
|
||||||
* @return True if cache and totolMemoryUsed is consistent
|
* @return True if cache and totalMemoryUsed is consistent
|
||||||
*/
|
*/
|
||||||
boolean checkTotalMemoryUsed() {
|
boolean checkTotalMemoryUsed() {
|
||||||
int totalSize = 0;
|
int totalSize = 0;
|
||||||
|
@ -175,13 +204,13 @@ class IndexCache {
|
||||||
/**
|
/**
|
||||||
* Bring memory usage below totalMemoryAllowed.
|
* Bring memory usage below totalMemoryAllowed.
|
||||||
*/
|
*/
|
||||||
private synchronized void freeIndexInformation() {
|
private synchronized void freeIndexInformation() throws IOException {
|
||||||
while (totalMemoryUsed.get() > totalMemoryAllowed) {
|
while (totalMemoryUsed.get() > totalMemoryAllowed) {
|
||||||
String s = queue.remove();
|
if(queue.isEmpty()) {
|
||||||
IndexInformation info = cache.remove(s);
|
break;
|
||||||
if (info != null) {
|
|
||||||
totalMemoryUsed.addAndGet(-info.getSize());
|
|
||||||
}
|
}
|
||||||
|
String mapId = queue.remove();
|
||||||
|
removeMapInternal(mapId);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -21,6 +21,7 @@ import java.io.DataOutputStream;
|
||||||
import java.io.FileNotFoundException;
|
import java.io.FileNotFoundException;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.Random;
|
import java.util.Random;
|
||||||
|
import java.util.concurrent.atomic.AtomicBoolean;
|
||||||
import java.util.zip.CRC32;
|
import java.util.zip.CRC32;
|
||||||
import java.util.zip.CheckedOutputStream;
|
import java.util.zip.CheckedOutputStream;
|
||||||
|
|
||||||
|
@ -211,22 +212,31 @@ public class TestIndexCache extends TestCase {
|
||||||
UserGroupInformation.getCurrentUser().getShortUserName();
|
UserGroupInformation.getCurrentUser().getShortUserName();
|
||||||
writeFile(fs, big, bytesPerFile, partsPerMap);
|
writeFile(fs, big, bytesPerFile, partsPerMap);
|
||||||
|
|
||||||
|
// Capture if any runtime exception occurred
|
||||||
|
final AtomicBoolean failed = new AtomicBoolean();
|
||||||
|
|
||||||
// run multiple times
|
// run multiple times
|
||||||
for (int i = 0; i < 20; ++i) {
|
for (int i = 0; i < 20; ++i) {
|
||||||
Thread getInfoThread = new Thread() {
|
Thread getInfoThread = new Thread() {
|
||||||
@Override
|
@Override
|
||||||
public void run() {
|
public void run() {
|
||||||
try {
|
try {
|
||||||
cache.getIndexInformation("bigIndex", partsPerMap, big, user);
|
cache.getIndexInformation("bigIndex", 0, big, user);
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
// should not be here
|
// should not be here
|
||||||
|
failed.set(true);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
Thread removeMapThread = new Thread() {
|
Thread removeMapThread = new Thread() {
|
||||||
@Override
|
@Override
|
||||||
public void run() {
|
public void run() {
|
||||||
cache.removeMap("bigIndex");
|
try {
|
||||||
|
cache.removeMap("bigIndex");
|
||||||
|
} catch (Exception e) {
|
||||||
|
// should not be here
|
||||||
|
failed.set(true);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
if (i%2==0) {
|
if (i%2==0) {
|
||||||
|
@ -238,7 +248,8 @@ public class TestIndexCache extends TestCase {
|
||||||
}
|
}
|
||||||
getInfoThread.join();
|
getInfoThread.join();
|
||||||
removeMapThread.join();
|
removeMapThread.join();
|
||||||
assertEquals(true, cache.checkTotalMemoryUsed());
|
assertFalse("An unexpected exception", failed.get());
|
||||||
|
assertTrue(cache.checkTotalMemoryUsed());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -254,6 +265,9 @@ public class TestIndexCache extends TestCase {
|
||||||
UserGroupInformation.getCurrentUser().getShortUserName();
|
UserGroupInformation.getCurrentUser().getShortUserName();
|
||||||
writeFile(fs, racy, bytesPerFile, partsPerMap);
|
writeFile(fs, racy, bytesPerFile, partsPerMap);
|
||||||
|
|
||||||
|
// Capture if any runtime exception occurred
|
||||||
|
final AtomicBoolean failed = new AtomicBoolean();
|
||||||
|
|
||||||
// run multiple instances
|
// run multiple instances
|
||||||
Thread[] getInfoThreads = new Thread[50];
|
Thread[] getInfoThreads = new Thread[50];
|
||||||
for (int i = 0; i < 50; i++) {
|
for (int i = 0; i < 50; i++) {
|
||||||
|
@ -261,10 +275,15 @@ public class TestIndexCache extends TestCase {
|
||||||
@Override
|
@Override
|
||||||
public void run() {
|
public void run() {
|
||||||
try {
|
try {
|
||||||
cache.getIndexInformation("racyIndex", partsPerMap, racy, user);
|
while (!Thread.currentThread().isInterrupted()) {
|
||||||
cache.removeMap("racyIndex");
|
cache.getIndexInformation("racyIndex", 0, racy, user);
|
||||||
|
cache.removeMap("racyIndex");
|
||||||
|
}
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
// should not be here
|
if (!Thread.currentThread().isInterrupted()) {
|
||||||
|
// should not be here
|
||||||
|
failed.set(true);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
@ -274,20 +293,12 @@ public class TestIndexCache extends TestCase {
|
||||||
getInfoThreads[i].start();
|
getInfoThreads[i].start();
|
||||||
}
|
}
|
||||||
|
|
||||||
final Thread mainTestThread = Thread.currentThread();
|
// The duration to keep the threads testing
|
||||||
|
Thread.sleep(5000);
|
||||||
Thread timeoutThread = new Thread() {
|
|
||||||
@Override
|
|
||||||
public void run() {
|
|
||||||
try {
|
|
||||||
Thread.sleep(15000);
|
|
||||||
mainTestThread.interrupt();
|
|
||||||
} catch (InterruptedException ie) {
|
|
||||||
// we are done;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
|
for (int i = 0; i < 50; i++) {
|
||||||
|
getInfoThreads[i].interrupt();
|
||||||
|
}
|
||||||
for (int i = 0; i < 50; i++) {
|
for (int i = 0; i < 50; i++) {
|
||||||
try {
|
try {
|
||||||
getInfoThreads[i].join();
|
getInfoThreads[i].join();
|
||||||
|
@ -296,10 +307,9 @@ public class TestIndexCache extends TestCase {
|
||||||
fail("Unexpectedly long delay during concurrent cache entry creations");
|
fail("Unexpectedly long delay during concurrent cache entry creations");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// stop the timeoutThread. If we get interrupted before stopping, there
|
assertFalse("An unexpected exception", failed.get());
|
||||||
// must be something wrong, although it wasn't a deadlock. No need to
|
assertTrue("Total memory used does not represent contents of the cache",
|
||||||
// catch and swallow.
|
cache.checkTotalMemoryUsed());
|
||||||
timeoutThread.interrupt();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void checkRecord(IndexRecord rec, long fill) {
|
private static void checkRecord(IndexRecord rec, long fill) {
|
||||||
|
|
Loading…
Reference in New Issue