HDFS-13901. INode access time is ignored because of race between open and rename. Contributed by Jinglun.

This commit is contained in:
Wei-Chiu Chuang 2019-10-22 09:34:11 -07:00
parent 8c74717720
commit 31243f0d29
4 changed files with 101 additions and 28 deletions

View File

@ -186,7 +186,7 @@ class FSDirStatAndListingOp {
boolean updateAccessTime = fsd.isAccessTimeSupported()
&& !iip.isSnapshot()
&& now > inode.getAccessTime() + fsd.getAccessTimePrecision();
return new GetBlockLocationsResult(updateAccessTime, blocks);
return new GetBlockLocationsResult(updateAccessTime, blocks, iip);
} finally {
fsd.readUnlock();
}
@ -599,13 +599,18 @@ class FSDirStatAndListingOp {
static class GetBlockLocationsResult {
final boolean updateAccessTime;
final LocatedBlocks blocks;
private final INodesInPath iip;
boolean updateAccessTime() {
return updateAccessTime;
}
public INodesInPath getIIp() {
return iip;
}
private GetBlockLocationsResult(
boolean updateAccessTime, LocatedBlocks blocks) {
boolean updateAccessTime, LocatedBlocks blocks, INodesInPath iip) {
this.updateAccessTime = updateAccessTime;
this.blocks = blocks;
this.iip = iip;
}
}
}

View File

@ -1958,11 +1958,13 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
checkOperation(OperationCategory.READ);
GetBlockLocationsResult res = null;
final FSPermissionChecker pc = getPermissionChecker();
final INode inode;
readLock();
try {
checkOperation(OperationCategory.READ);
res = FSDirStatAndListingOp.getBlockLocations(
dir, pc, srcArg, offset, length, true);
inode = res.getIIp().getLastINode();
if (isInSafeMode()) {
for (LocatedBlock b : res.blocks.getLocatedBlocks()) {
// if safemode & no block locations yet then throw safemodeException
@ -2003,34 +2005,16 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
final long now = now();
try {
checkOperation(OperationCategory.WRITE);
/**
* Resolve the path again and update the atime only when the file
* exists.
*
* XXX: Races can still occur even after resolving the path again.
* For example:
*
* <ul>
* <li>Get the block location for "/a/b"</li>
* <li>Rename "/a/b" to "/c/b"</li>
* <li>The second resolution still points to "/a/b", which is
* wrong.</li>
* </ul>
*
* The behavior is incorrect but consistent with the one before
* HDFS-7463. A better fix is to change the edit log of SetTime to
* use inode id instead of a path.
*/
final INodesInPath iip = dir.resolvePath(pc, srcArg, DirOp.READ);
src = iip.getPath();
INode inode = iip.getLastINode();
boolean updateAccessTime = inode != null &&
boolean updateAccessTime =
now > inode.getAccessTime() + dir.getAccessTimePrecision();
if (!isInSafeMode() && updateAccessTime) {
boolean changed = FSDirAttrOp.setTimes(dir, iip, -1, now, false);
if (changed) {
getEditLog().logTimes(src, -1, now);
if (!inode.isDeleted()) {
src = inode.getFullPathName();
final INodesInPath iip = dir.resolvePath(pc, src, DirOp.READ);
boolean changed = FSDirAttrOp.setTimes(dir, iip, -1, now, false);
if (changed) {
getEditLog().logTimes(src, -1, now);
}
}
}
} catch (Throwable e) {

View File

@ -589,6 +589,18 @@ public abstract class INode implements INodeAttributes, Diff.Element<byte[]> {
return DFSUtil.bytes2String(path);
}
public boolean isDeleted() {
INode pInode = this;
while (pInode != null && !pInode.isRoot()) {
pInode = pInode.getParent();
}
if (pInode == null) {
return true;
} else {
return !pInode.isRoot();
}
}
public byte[][] getPathComponents() {
int n = 0;
for (INode inode = this; inode != null; inode = inode.getParent()) {

View File

@ -18,6 +18,7 @@
package org.apache.hadoop.hdfs.server.namenode;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.AbstractMap;
import java.util.ArrayList;
import java.util.Comparator;
@ -27,10 +28,13 @@ import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;
import java.util.concurrent.Semaphore;
import org.apache.hadoop.fs.Options;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.hdfs.AddBlockFlag;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
@ -66,6 +70,7 @@ import org.mockito.Mockito;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_LEASE_RECHECK_INTERVAL_MS_DEFAULT;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_LEASE_RECHECK_INTERVAL_MS_KEY;
import static org.junit.Assert.assertNotEquals;
/**
* Test race between delete and other operations. For now only addBlock()
@ -442,4 +447,71 @@ public class TestDeleteRace {
}
}
}
@Test(timeout = 20000)
public void testOpenRenameRace() throws Exception {
Configuration config = new Configuration();
config.setLong(DFSConfigKeys.DFS_NAMENODE_ACCESSTIME_PRECISION_KEY, 1);
MiniDFSCluster dfsCluster = null;
final String src = "/dir/src-file";
final String dst = "/dir/dst-file";
final DistributedFileSystem hdfs;
try {
dfsCluster = new MiniDFSCluster.Builder(config).build();
dfsCluster.waitActive();
final FSNamesystem fsn = dfsCluster.getNamesystem();
hdfs = dfsCluster.getFileSystem();
DFSTestUtil.createFile(hdfs, new Path(src), 5, (short) 1, 0xFEED);
FileStatus status = hdfs.getFileStatus(new Path(src));
long accessTime = status.getAccessTime();
Semaphore openSem = new Semaphore(0);
Semaphore renameSem = new Semaphore(0);
// 1.hold writeLock.
// 2.start open thread.
// 3.openSem & yield makes sure open thread wait on readLock.
// 4.start rename thread.
// 5.renameSem & yield makes sure rename thread wait on writeLock.
// 6.release writeLock, it's fair lock so open thread gets read lock.
// 7.open thread unlocks, rename gets write lock and does rename.
// 8.rename thread unlocks, open thread gets write lock and update time.
Thread open = new Thread(() -> {
try {
openSem.release();
fsn.getBlockLocations("foo", src, 0, 5);
} catch (IOException e) {
}
});
Thread rename = new Thread(() -> {
try {
openSem.acquire();
renameSem.release();
fsn.renameTo(src, dst, false, Options.Rename.NONE);
} catch (IOException e) {
} catch (InterruptedException e) {
}
});
fsn.writeLock();
open.start();
openSem.acquire();
Thread.yield();
openSem.release();
rename.start();
renameSem.acquire();
Thread.yield();
fsn.writeUnlock();
// wait open and rename threads finish.
open.join();
rename.join();
status = hdfs.getFileStatus(new Path(dst));
assertNotEquals(accessTime, status.getAccessTime());
dfsCluster.restartNameNode(0);
} finally {
if (dfsCluster != null) {
dfsCluster.shutdown();
}
}
}
}