HDFS-13901. INode access time is ignored because of race between open and rename. Contributed by Jinglun.

This commit is contained in:
Wei-Chiu Chuang 2019-10-21 17:31:43 -07:00
parent c4e27ef773
commit 72003b19bf
4 changed files with 101 additions and 28 deletions

View File

@ -186,7 +186,7 @@ class FSDirStatAndListingOp {
boolean updateAccessTime = fsd.isAccessTimeSupported() boolean updateAccessTime = fsd.isAccessTimeSupported()
&& !iip.isSnapshot() && !iip.isSnapshot()
&& now > inode.getAccessTime() + fsd.getAccessTimePrecision(); && now > inode.getAccessTime() + fsd.getAccessTimePrecision();
return new GetBlockLocationsResult(updateAccessTime, blocks); return new GetBlockLocationsResult(updateAccessTime, blocks, iip);
} finally { } finally {
fsd.readUnlock(); fsd.readUnlock();
} }
@ -599,13 +599,18 @@ class FSDirStatAndListingOp {
static class GetBlockLocationsResult { static class GetBlockLocationsResult {
final boolean updateAccessTime; final boolean updateAccessTime;
final LocatedBlocks blocks; final LocatedBlocks blocks;
private final INodesInPath iip;
boolean updateAccessTime() { boolean updateAccessTime() {
return updateAccessTime; return updateAccessTime;
} }
public INodesInPath getIIp() {
return iip;
}
private GetBlockLocationsResult( private GetBlockLocationsResult(
boolean updateAccessTime, LocatedBlocks blocks) { boolean updateAccessTime, LocatedBlocks blocks, INodesInPath iip) {
this.updateAccessTime = updateAccessTime; this.updateAccessTime = updateAccessTime;
this.blocks = blocks; this.blocks = blocks;
this.iip = iip;
} }
} }
} }

View File

@ -1974,12 +1974,14 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
checkOperation(OperationCategory.READ); checkOperation(OperationCategory.READ);
GetBlockLocationsResult res = null; GetBlockLocationsResult res = null;
final FSPermissionChecker pc = getPermissionChecker(); final FSPermissionChecker pc = getPermissionChecker();
final INode inode;
try { try {
readLock(); readLock();
try { try {
checkOperation(OperationCategory.READ); checkOperation(OperationCategory.READ);
res = FSDirStatAndListingOp.getBlockLocations( res = FSDirStatAndListingOp.getBlockLocations(
dir, pc, srcArg, offset, length, true); dir, pc, srcArg, offset, length, true);
inode = res.getIIp().getLastINode();
if (isInSafeMode()) { if (isInSafeMode()) {
for (LocatedBlock b : res.blocks.getLocatedBlocks()) { for (LocatedBlock b : res.blocks.getLocatedBlocks()) {
// if safemode & no block locations yet then throw safemodeException // if safemode & no block locations yet then throw safemodeException
@ -2022,36 +2024,18 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
final long now = now(); final long now = now();
try { try {
checkOperation(OperationCategory.WRITE); checkOperation(OperationCategory.WRITE);
/** boolean updateAccessTime =
* Resolve the path again and update the atime only when the file
* exists.
*
* XXX: Races can still occur even after resolving the path again.
* For example:
*
* <ul>
* <li>Get the block location for "/a/b"</li>
* <li>Rename "/a/b" to "/c/b"</li>
* <li>The second resolution still points to "/a/b", which is
* wrong.</li>
* </ul>
*
* The behavior is incorrect but consistent with the one before
* HDFS-7463. A better fix is to change the edit log of SetTime to
* use inode id instead of a path.
*/
final INodesInPath iip = dir.resolvePath(pc, srcArg, DirOp.READ);
src = iip.getPath();
INode inode = iip.getLastINode();
boolean updateAccessTime = inode != null &&
now > inode.getAccessTime() + dir.getAccessTimePrecision(); now > inode.getAccessTime() + dir.getAccessTimePrecision();
if (!isInSafeMode() && updateAccessTime) { if (!isInSafeMode() && updateAccessTime) {
if (!inode.isDeleted()) {
src = inode.getFullPathName();
final INodesInPath iip = dir.resolvePath(pc, src, DirOp.READ);
boolean changed = FSDirAttrOp.setTimes(dir, iip, -1, now, false); boolean changed = FSDirAttrOp.setTimes(dir, iip, -1, now, false);
if (changed) { if (changed) {
getEditLog().logTimes(src, -1, now); getEditLog().logTimes(src, -1, now);
} }
} }
}
} finally { } finally {
writeUnlock(operationName); writeUnlock(operationName);
} }

View File

@ -589,6 +589,18 @@ public abstract class INode implements INodeAttributes, Diff.Element<byte[]> {
return DFSUtil.bytes2String(path); return DFSUtil.bytes2String(path);
} }
public boolean isDeleted() {
INode pInode = this;
while (pInode != null && !pInode.isRoot()) {
pInode = pInode.getParent();
}
if (pInode == null) {
return true;
} else {
return !pInode.isRoot();
}
}
public byte[][] getPathComponents() { public byte[][] getPathComponents() {
int n = 0; int n = 0;
for (INode inode = this; inode != null; inode = inode.getParent()) { for (INode inode = this; inode != null; inode = inode.getParent()) {

View File

@ -18,6 +18,7 @@
package org.apache.hadoop.hdfs.server.namenode; package org.apache.hadoop.hdfs.server.namenode;
import java.io.FileNotFoundException; import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.AbstractMap; import java.util.AbstractMap;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Comparator; import java.util.Comparator;
@ -27,10 +28,13 @@ import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Set; import java.util.Set;
import java.util.TreeSet; import java.util.TreeSet;
import java.util.concurrent.Semaphore;
import org.apache.hadoop.fs.Options;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.hdfs.AddBlockFlag; import org.apache.hadoop.hdfs.AddBlockFlag;
import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileSystem;
@ -65,6 +69,7 @@ import org.mockito.Mockito;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_LEASE_RECHECK_INTERVAL_MS_DEFAULT; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_LEASE_RECHECK_INTERVAL_MS_DEFAULT;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_LEASE_RECHECK_INTERVAL_MS_KEY; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_LEASE_RECHECK_INTERVAL_MS_KEY;
import static org.junit.Assert.assertNotEquals;
/** /**
* Test race between delete and other operations. For now only addBlock() * Test race between delete and other operations. For now only addBlock()
@ -441,4 +446,71 @@ public class TestDeleteRace {
} }
} }
} }
@Test(timeout = 20000)
public void testOpenRenameRace() throws Exception {
Configuration config = new Configuration();
config.setLong(DFSConfigKeys.DFS_NAMENODE_ACCESSTIME_PRECISION_KEY, 1);
MiniDFSCluster dfsCluster = null;
final String src = "/dir/src-file";
final String dst = "/dir/dst-file";
final DistributedFileSystem hdfs;
try {
dfsCluster = new MiniDFSCluster.Builder(config).build();
dfsCluster.waitActive();
final FSNamesystem fsn = dfsCluster.getNamesystem();
hdfs = dfsCluster.getFileSystem();
DFSTestUtil.createFile(hdfs, new Path(src), 5, (short) 1, 0xFEED);
FileStatus status = hdfs.getFileStatus(new Path(src));
long accessTime = status.getAccessTime();
Semaphore openSem = new Semaphore(0);
Semaphore renameSem = new Semaphore(0);
// 1.hold writeLock.
// 2.start open thread.
// 3.openSem & yield makes sure open thread wait on readLock.
// 4.start rename thread.
// 5.renameSem & yield makes sure rename thread wait on writeLock.
// 6.release writeLock, it's fair lock so open thread gets read lock.
// 7.open thread unlocks, rename gets write lock and does rename.
// 8.rename thread unlocks, open thread gets write lock and update time.
Thread open = new Thread(() -> {
try {
openSem.release();
fsn.getBlockLocations("foo", src, 0, 5);
} catch (IOException e) {
}
});
Thread rename = new Thread(() -> {
try {
openSem.acquire();
renameSem.release();
fsn.renameTo(src, dst, false, Options.Rename.NONE);
} catch (IOException e) {
} catch (InterruptedException e) {
}
});
fsn.writeLock();
open.start();
openSem.acquire();
Thread.yield();
openSem.release();
rename.start();
renameSem.acquire();
Thread.yield();
fsn.writeUnlock();
// wait open and rename threads finish.
open.join();
rename.join();
status = hdfs.getFileStatus(new Path(dst));
assertNotEquals(accessTime, status.getAccessTime());
dfsCluster.restartNameNode(0);
} finally {
if (dfsCluster != null) {
dfsCluster.shutdown();
}
}
}
} }