MAPREDUCE-2409. DistributedCache maps files and archives to the same path,
despite semantic incompatibility. Contributed by Siddharth Seth git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1149004 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
08928d067b
commit
3810266eae
|
@ -344,6 +344,9 @@ Trunk (unreleased changes)
|
|||
MAPREDUCE-2710. Update JobSubmitter.printTokens(..) for HDFS-2161.
|
||||
(szetszwo)
|
||||
|
||||
MAPREDUCE-2409. DistributedCache maps files and archives to the same path,
|
||||
despite semantic incompatibility. (Siddharth Seth via cdouglas)
|
||||
|
||||
Release 0.22.0 - Unreleased
|
||||
|
||||
INCOMPATIBLE CHANGES
|
||||
|
|
|
@ -241,7 +241,7 @@ public class TaskDistributedCacheManager {
|
|||
for (CacheFile c : cacheFiles) {
|
||||
if (c.getLocalized()) {
|
||||
distributedCacheManager.releaseCache(c.uri, taskConf, c.timestamp,
|
||||
c.owner);
|
||||
c.owner, CacheFile.FileType.ARCHIVE == c.type);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -158,9 +158,9 @@ public class TrackerDistributedCacheManager {
|
|||
Path currentWorkDir, boolean honorSymLinkConf, boolean isPublic)
|
||||
throws IOException {
|
||||
String key;
|
||||
key = getKey(cache, conf, confFileStamp, getLocalizedCacheOwner(isPublic));
|
||||
key = getKey(cache, conf, confFileStamp, getLocalizedCacheOwner(isPublic),
|
||||
isArchive);
|
||||
CacheStatus lcacheStatus;
|
||||
Path localizedPath = null;
|
||||
synchronized (cachedArchives) {
|
||||
lcacheStatus = cachedArchives.get(key);
|
||||
if (lcacheStatus == null) {
|
||||
|
@ -187,18 +187,18 @@ public class TrackerDistributedCacheManager {
|
|||
FileSystem fs = FileSystem.get(cache, conf);
|
||||
checkStampSinceJobStarted(conf, fs, cache, confFileStamp,
|
||||
lcacheStatus, fileStatus);
|
||||
localizedPath = localizeCache(conf, cache, confFileStamp,
|
||||
localizeCache(conf, cache, confFileStamp,
|
||||
lcacheStatus, isArchive, isPublic);
|
||||
lcacheStatus.initComplete();
|
||||
} else {
|
||||
localizedPath = checkCacheStatusValidity(conf, cache, confFileStamp,
|
||||
checkCacheStatusValidity(conf, cache, confFileStamp,
|
||||
lcacheStatus, fileStatus, isArchive);
|
||||
}
|
||||
createSymlink(conf, cache, lcacheStatus, isArchive, currentWorkDir,
|
||||
honorSymLinkConf);
|
||||
}
|
||||
initSuccessful = true;
|
||||
return localizedPath;
|
||||
return lcacheStatus.localizedLoadPath;
|
||||
} finally {
|
||||
if (!initSuccessful) {
|
||||
lcacheStatus.decRefCount();
|
||||
|
@ -217,8 +217,8 @@ public class TrackerDistributedCacheManager {
|
|||
* @throws IOException
|
||||
*/
|
||||
void releaseCache(URI cache, Configuration conf, long timeStamp,
|
||||
String owner) throws IOException {
|
||||
String key = getKey(cache, conf, timeStamp, owner);
|
||||
String owner, boolean isArchive) throws IOException {
|
||||
String key = getKey(cache, conf, timeStamp, owner, isArchive);
|
||||
synchronized (cachedArchives) {
|
||||
CacheStatus lcacheStatus = cachedArchives.get(key);
|
||||
if (lcacheStatus == null) {
|
||||
|
@ -236,8 +236,8 @@ public class TrackerDistributedCacheManager {
|
|||
* This method is called from unit tests.
|
||||
*/
|
||||
int getReferenceCount(URI cache, Configuration conf, long timeStamp,
|
||||
String owner) throws IOException {
|
||||
String key = getKey(cache, conf, timeStamp, owner);
|
||||
String owner, boolean isArchive) throws IOException {
|
||||
String key = getKey(cache, conf, timeStamp, owner, isArchive);
|
||||
synchronized (cachedArchives) {
|
||||
CacheStatus lcacheStatus = cachedArchives.get(key);
|
||||
if (lcacheStatus == null) {
|
||||
|
@ -315,9 +315,10 @@ public class TrackerDistributedCacheManager {
|
|||
return path;
|
||||
}
|
||||
|
||||
String getKey(URI cache, Configuration conf, long timeStamp, String user)
|
||||
throws IOException {
|
||||
return makeRelative(cache, conf) + String.valueOf(timeStamp) + user;
|
||||
String getKey(URI cache, Configuration conf, long timeStamp, String user,
|
||||
boolean isArchive) throws IOException {
|
||||
return (isArchive ? "a" : "f") + "^" + makeRelative(cache, conf)
|
||||
+ String.valueOf(timeStamp) + user;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -341,12 +342,11 @@ public class TrackerDistributedCacheManager {
|
|||
* @return mtime of a given cache file on hdfs
|
||||
* @throws IOException
|
||||
*/
|
||||
static long getTimestamp(Configuration conf, URI cache)
|
||||
throws IOException {
|
||||
long getTimestamp(Configuration conf, URI cache) throws IOException {
|
||||
return getFileStatus(conf, cache).getModificationTime();
|
||||
}
|
||||
|
||||
private Path checkCacheStatusValidity(Configuration conf,
|
||||
void checkCacheStatusValidity(Configuration conf,
|
||||
URI cache, long confFileStamp,
|
||||
CacheStatus cacheStatus,
|
||||
FileStatus fileStatus,
|
||||
|
@ -362,7 +362,6 @@ public class TrackerDistributedCacheManager {
|
|||
|
||||
LOG.info(String.format("Using existing cache of %s->%s",
|
||||
cache.toString(), cacheStatus.localizedLoadPath));
|
||||
return cacheStatus.localizedLoadPath;
|
||||
}
|
||||
|
||||
private void createSymlink(Configuration conf, URI cache,
|
||||
|
@ -472,7 +471,7 @@ public class TrackerDistributedCacheManager {
|
|||
}
|
||||
|
||||
// ensure that the file on hdfs hasn't been modified since the job started
|
||||
private long checkStampSinceJobStarted(Configuration conf, FileSystem fs,
|
||||
long checkStampSinceJobStarted(Configuration conf, FileSystem fs,
|
||||
URI cache, long confFileStamp,
|
||||
CacheStatus lcacheStatus,
|
||||
FileStatus fileStatus)
|
||||
|
|
|
@ -51,11 +51,15 @@ import org.apache.hadoop.fs.permission.FsAction;
|
|||
import org.apache.hadoop.fs.permission.FsPermission;
|
||||
import org.apache.hadoop.mapreduce.filecache.TaskDistributedCacheManager;
|
||||
import org.apache.hadoop.mapreduce.filecache.TrackerDistributedCacheManager;
|
||||
import org.apache.hadoop.mapreduce.filecache.TrackerDistributedCacheManager.CacheStatus;
|
||||
import org.apache.hadoop.mapreduce.server.tasktracker.TTConfig;
|
||||
import org.apache.hadoop.security.UserGroupInformation;
|
||||
import org.apache.hadoop.util.ReflectionUtils;
|
||||
import org.mortbay.log.Log;
|
||||
|
||||
import org.mockito.Matchers;
|
||||
import static org.mockito.Mockito.*;
|
||||
|
||||
public class TestTrackerDistributedCacheManager extends TestCase {
|
||||
|
||||
protected String TEST_ROOT_DIR =
|
||||
|
@ -251,7 +255,7 @@ public class TestTrackerDistributedCacheManager extends TestCase {
|
|||
handle.release();
|
||||
for (TaskDistributedCacheManager.CacheFile c : handle.getCacheFiles()) {
|
||||
assertEquals(0, manager.getReferenceCount(c.uri, conf1, c.timestamp,
|
||||
c.owner));
|
||||
c.owner, false));
|
||||
}
|
||||
|
||||
Path thirdCacheFile = new Path(TEST_ROOT_DIR, "thirdcachefile");
|
||||
|
@ -289,7 +293,7 @@ public class TestTrackerDistributedCacheManager extends TestCase {
|
|||
for (TaskDistributedCacheManager.CacheFile c : handle.getCacheFiles()) {
|
||||
try {
|
||||
assertEquals(0, manager.getReferenceCount(c.uri, conf2, c.timestamp,
|
||||
c.owner));
|
||||
c.owner, false));
|
||||
} catch (IOException ie) {
|
||||
th = ie;
|
||||
Log.info("Exception getting reference count for " + c.uri, ie);
|
||||
|
@ -609,15 +613,15 @@ public class TestTrackerDistributedCacheManager extends TestCase {
|
|||
|
||||
manager.releaseCache(thirdCacheFile.toUri(), conf2,
|
||||
getFileStamp(thirdCacheFile),
|
||||
TrackerDistributedCacheManager.getLocalizedCacheOwner(false));
|
||||
TrackerDistributedCacheManager.getLocalizedCacheOwner(false), false);
|
||||
|
||||
manager.releaseCache(secondCacheFile.toUri(), conf2,
|
||||
getFileStamp(secondCacheFile),
|
||||
TrackerDistributedCacheManager.getLocalizedCacheOwner(false));
|
||||
TrackerDistributedCacheManager.getLocalizedCacheOwner(false), false);
|
||||
|
||||
manager.releaseCache(firstCacheFile.toUri(), conf2,
|
||||
getFileStamp(firstCacheFile),
|
||||
TrackerDistributedCacheManager.getLocalizedCacheOwner(false));
|
||||
TrackerDistributedCacheManager.getLocalizedCacheOwner(false), false);
|
||||
|
||||
|
||||
// Getting the fourth cache will make the number of sub directories becomes
|
||||
|
@ -645,6 +649,47 @@ public class TestTrackerDistributedCacheManager extends TestCase {
|
|||
manager.stopCleanupThread();
|
||||
}
|
||||
|
||||
public void testSameNameFileArchiveCache() throws IOException,
|
||||
URISyntaxException, InterruptedException {
|
||||
if (!canRun()) {
|
||||
return;
|
||||
}
|
||||
TrackerDistributedCacheManager manager =
|
||||
spy(new TrackerDistributedCacheManager(conf, taskController));
|
||||
URI rsrc = new URI("file://foo/bar/yak");
|
||||
Path cacheDir = new Path("file:///localcache");
|
||||
Path archivePath = new Path(cacheDir, "archive");
|
||||
Path filePath = new Path(cacheDir, "file");
|
||||
doReturn(archivePath).when(manager).localizeCache(eq(conf), eq(rsrc),
|
||||
anyLong(), Matchers.<CacheStatus> anyObject(), eq(true), anyBoolean());
|
||||
doReturn(filePath).when(manager).localizeCache(eq(conf), eq(rsrc),
|
||||
anyLong(), Matchers.<CacheStatus> anyObject(), eq(false), anyBoolean());
|
||||
// could fail, but check match instead
|
||||
doNothing().when(manager).checkCacheStatusValidity(
|
||||
Matchers.<Configuration> anyObject(), eq(rsrc), anyLong(),
|
||||
Matchers.<CacheStatus> anyObject(), Matchers.<FileStatus> anyObject(),
|
||||
anyBoolean());
|
||||
// localizeCache initializes mtime of cached rsrc; set to uninitialized val
|
||||
doReturn(-1L).when(manager).checkStampSinceJobStarted(
|
||||
Matchers.<Configuration> anyObject(),
|
||||
Matchers.<FileSystem> anyObject(), eq(rsrc), anyLong(),
|
||||
Matchers.<CacheStatus> anyObject(), Matchers.<FileStatus> anyObject());
|
||||
doReturn(-1L).when(manager).getTimestamp(
|
||||
Matchers.<Configuration> anyObject(), eq(rsrc));
|
||||
FileStatus rsrcStatus = mock(FileStatus.class);
|
||||
when(rsrcStatus.getLen()).thenReturn(4344L);
|
||||
|
||||
Path localizedPathForFile =
|
||||
manager.getLocalCache(rsrc, conf, "sub", rsrcStatus, false, 20L,
|
||||
new Path("file:///tmp"), false, true);
|
||||
Path localizedPathForArchive =
|
||||
manager.getLocalCache(rsrc, conf, "sub", rsrcStatus, true, 20L,
|
||||
new Path("file:///tmp"), false, true);
|
||||
assertNotSame("File and Archive resolve to the same path: "
|
||||
+ localizedPathForFile + ". Should differ.", localizedPathForFile,
|
||||
localizedPathForArchive);
|
||||
}
|
||||
|
||||
/** test delete cache */
|
||||
public void testDeleteCache() throws Exception {
|
||||
if (!canRun()) {
|
||||
|
@ -676,7 +721,7 @@ public class TestTrackerDistributedCacheManager extends TestCase {
|
|||
getFileStamp(firstCacheFile), new Path(TEST_ROOT_DIR), false, false);
|
||||
manager.releaseCache(firstCacheFile.toUri(), conf2,
|
||||
getFileStamp(firstCacheFile),
|
||||
TrackerDistributedCacheManager.getLocalizedCacheOwner(false));
|
||||
TrackerDistributedCacheManager.getLocalizedCacheOwner(false), false);
|
||||
//in above code,localized a file of size 4K and then release the cache
|
||||
// which will cause the cache be deleted when the limit goes out.
|
||||
// The below code localize another cache which's designed to
|
||||
|
@ -702,7 +747,7 @@ public class TestTrackerDistributedCacheManager extends TestCase {
|
|||
// Release the third cache so that it can be deleted while sweeping
|
||||
manager.releaseCache(thirdCacheFile.toUri(), conf2,
|
||||
getFileStamp(thirdCacheFile),
|
||||
TrackerDistributedCacheManager.getLocalizedCacheOwner(false));
|
||||
TrackerDistributedCacheManager.getLocalizedCacheOwner(false), false);
|
||||
// Getting the fourth cache will make the number of sub directories becomes
|
||||
// 3 which is greater than 2. So the released cache will be deleted.
|
||||
manager.getLocalCache(fourthCacheFile.toUri(), conf2,
|
||||
|
|
Loading…
Reference in New Issue