HDFS-10768. Optimize mkdir ops. Contributed by Daryn Sharp.

This commit is contained in:
Kihwal Lee 2016-08-26 15:39:21 -05:00
parent cde3a00526
commit 8b7adf4ddf
5 changed files with 68 additions and 94 deletions

View File

@ -32,10 +32,7 @@ import org.apache.hadoop.hdfs.protocol.QuotaExceededException;
import org.apache.hadoop.hdfs.server.namenode.snapshot.Snapshot; import org.apache.hadoop.hdfs.server.namenode.snapshot.Snapshot;
import java.io.IOException; import java.io.IOException;
import java.util.AbstractMap;
import java.util.List; import java.util.List;
import java.util.Map;
import static org.apache.hadoop.util.Time.now; import static org.apache.hadoop.util.Time.now;
class FSDirMkdirOp { class FSDirMkdirOp {
@ -63,7 +60,6 @@ class FSDirMkdirOp {
throw new FileAlreadyExistsException("Path is not a directory: " + src); throw new FileAlreadyExistsException("Path is not a directory: " + src);
} }
INodesInPath existing = lastINode != null ? iip : iip.getExistingINodes();
if (lastINode == null) { if (lastINode == null) {
if (fsd.isPermissionEnabled()) { if (fsd.isPermissionEnabled()) {
fsd.checkAncestorAccess(pc, iip, FsAction.WRITE); fsd.checkAncestorAccess(pc, iip, FsAction.WRITE);
@ -78,26 +74,20 @@ class FSDirMkdirOp {
// create multiple inodes. // create multiple inodes.
fsn.checkFsObjectLimit(); fsn.checkFsObjectLimit();
List<String> nonExisting = iip.getPath(existing.length(),
iip.length() - existing.length());
int length = nonExisting.size();
if (length > 1) {
List<String> ancestors = nonExisting.subList(0, length - 1);
// Ensure that the user can traversal the path by adding implicit // Ensure that the user can traversal the path by adding implicit
// u+wx permission to all ancestor directories // u+wx permission to all ancestor directories.
existing = createChildrenDirectories(fsd, existing, ancestors, INodesInPath existing =
addImplicitUwx(permissions, permissions)); createParentDirectories(fsd, iip, permissions, false);
if (existing != null) {
existing = createSingleDirectory(
fsd, existing, iip.getLastLocalName(), permissions);
}
if (existing == null) { if (existing == null) {
throw new IOException("Failed to create directory: " + src); throw new IOException("Failed to create directory: " + src);
} }
iip = existing;
} }
return fsd.getAuditFileInfo(iip);
if ((existing = createChildrenDirectories(fsd, existing,
nonExisting.subList(length - 1, length), permissions)) == null) {
throw new IOException("Failed to create directory: " + src);
}
}
return fsd.getAuditFileInfo(existing);
} finally { } finally {
fsd.writeUnlock(); fsd.writeUnlock();
} }
@ -112,35 +102,18 @@ class FSDirMkdirOp {
* For example, path="/foo/bar/spam", "/foo" is an existing directory, * For example, path="/foo/bar/spam", "/foo" is an existing directory,
* "/foo/bar" is not existing yet, the function will create directory bar. * "/foo/bar" is not existing yet, the function will create directory bar.
* *
* @return a tuple which contains both the new INodesInPath (with all the * @return a INodesInPath with all the existing and newly created
* existing and newly created directories) and the last component in the * ancestor directories created.
* relative path. Or return null if there are errors. * Or return null if there are errors.
*/ */
static Map.Entry<INodesInPath, String> createAncestorDirectories( static INodesInPath createAncestorDirectories(
FSDirectory fsd, INodesInPath iip, PermissionStatus permission) FSDirectory fsd, INodesInPath iip, PermissionStatus permission)
throws IOException { throws IOException {
final String last = DFSUtil.bytes2String(iip.getLastLocalName()); return createParentDirectories(fsd, iip, permission, true);
INodesInPath existing = iip.getExistingINodes();
List<String> children = iip.getPath(existing.length(),
iip.length() - existing.length());
int size = children.size();
if (size > 1) { // otherwise all ancestors have been created
List<String> directories = children.subList(0, size - 1);
INode parentINode = existing.getLastINode();
// Ensure that the user can traversal the path by adding implicit
// u+wx permission to all ancestor directories
existing = createChildrenDirectories(fsd, existing, directories,
addImplicitUwx(parentINode.getPermissionStatus(), permission));
if (existing == null) {
return null;
}
}
return new AbstractMap.SimpleImmutableEntry<>(existing, last);
} }
/** /**
* Create the directory {@code parent} / {@code children} and all ancestors * Create all ancestor directories and return the parent inodes.
* along the path.
* *
* @param fsd FSDirectory * @param fsd FSDirectory
* @param existing The INodesInPath instance containing all the existing * @param existing The INodesInPath instance containing all the existing
@ -149,21 +122,35 @@ class FSDirMkdirOp {
* starting with "/" * starting with "/"
* @param perm the permission of the directory. Note that all ancestors * @param perm the permission of the directory. Note that all ancestors
* created along the path has implicit {@code u+wx} permissions. * created along the path has implicit {@code u+wx} permissions.
* @param inheritPerms if the ancestor directories should inherit permissions
* or use the specified permissions.
* *
* @return {@link INodesInPath} which contains all inodes to the * @return {@link INodesInPath} which contains all inodes to the
* target directory, After the execution parentPath points to the path of * target directory, After the execution parentPath points to the path of
* the returned INodesInPath. The function return null if the operation has * the returned INodesInPath. The function return null if the operation has
* failed. * failed.
*/ */
private static INodesInPath createChildrenDirectories(FSDirectory fsd, private static INodesInPath createParentDirectories(FSDirectory fsd,
INodesInPath existing, List<String> children, PermissionStatus perm) INodesInPath iip, PermissionStatus perm, boolean inheritPerms)
throws IOException { throws IOException {
assert fsd.hasWriteLock(); assert fsd.hasWriteLock();
// this is the desired parent iip if the subsequent delta is 1.
for (String component : children) { INodesInPath existing = iip.getExistingINodes();
int missing = iip.length() - existing.length();
if (missing == 0) { // full path exists, return parents.
existing = iip.getParentINodesInPath();
} else if (missing > 1) { // need to create at least one ancestor dir.
// Ensure that the user can traversal the path by adding implicit
// u+wx permission to all ancestor directories.
PermissionStatus basePerm = inheritPerms
? existing.getLastINode().getPermissionStatus()
: perm;
perm = addImplicitUwx(basePerm, perm);
// create all the missing directories.
final int last = iip.length() - 2;
for (int i = existing.length(); existing != null && i <= last; i++) {
byte[] component = iip.getPathComponent(i);
existing = createSingleDirectory(fsd, existing, component, perm); existing = createSingleDirectory(fsd, existing, component, perm);
if (existing == null) {
return null;
} }
} }
return existing; return existing;
@ -183,11 +170,11 @@ class FSDirMkdirOp {
} }
private static INodesInPath createSingleDirectory(FSDirectory fsd, private static INodesInPath createSingleDirectory(FSDirectory fsd,
INodesInPath existing, String localName, PermissionStatus perm) INodesInPath existing, byte[] localName, PermissionStatus perm)
throws IOException { throws IOException {
assert fsd.hasWriteLock(); assert fsd.hasWriteLock();
existing = unprotectedMkdir(fsd, fsd.allocateNewInodeId(), existing, existing = unprotectedMkdir(fsd, fsd.allocateNewInodeId(), existing,
DFSUtil.string2Bytes(localName), perm, null, now()); localName, perm, null, now());
if (existing == null) { if (existing == null) {
return null; return null;
} }

View File

@ -27,7 +27,6 @@ import org.apache.hadoop.hdfs.protocol.HdfsFileStatus;
import org.apache.hadoop.hdfs.protocol.QuotaExceededException; import org.apache.hadoop.hdfs.protocol.QuotaExceededException;
import java.io.IOException; import java.io.IOException;
import java.util.Map;
import static org.apache.hadoop.util.Time.now; import static org.apache.hadoop.util.Time.now;
@ -99,21 +98,21 @@ class FSDirSymlinkOp {
INodesInPath iip, String target, PermissionStatus dirPerms, INodesInPath iip, String target, PermissionStatus dirPerms,
boolean createParent, boolean logRetryCache) throws IOException { boolean createParent, boolean logRetryCache) throws IOException {
final long mtime = now(); final long mtime = now();
final byte[] localName = iip.getLastLocalName(); final INodesInPath parent;
if (createParent) { if (createParent) {
Map.Entry<INodesInPath, String> e = FSDirMkdirOp parent = FSDirMkdirOp.createAncestorDirectories(fsd, iip, dirPerms);
.createAncestorDirectories(fsd, iip, dirPerms); if (parent == null) {
if (e == null) {
return null; return null;
} }
iip = INodesInPath.append(e.getKey(), null, localName); } else {
parent = iip.getParentINodesInPath();
} }
final String userName = dirPerms.getUserName(); final String userName = dirPerms.getUserName();
long id = fsd.allocateNewInodeId(); long id = fsd.allocateNewInodeId();
PermissionStatus perm = new PermissionStatus( PermissionStatus perm = new PermissionStatus(
userName, null, FsPermission.getDefault()); userName, null, FsPermission.getDefault());
INodeSymlink newNode = unprotectedAddSymlink(fsd, iip.getExistingINodes(), INodeSymlink newNode = unprotectedAddSymlink(fsd, parent,
localName, id, target, mtime, mtime, perm); iip.getLastLocalName(), id, target, mtime, mtime, perm);
if (newNode == null) { if (newNode == null) {
NameNode.stateChangeLog.info("addSymlink: failed to add " + path); NameNode.stateChangeLog.info("addSymlink: failed to add " + path);
return null; return null;

View File

@ -65,7 +65,6 @@ import java.util.Collections;
import java.util.EnumSet; import java.util.EnumSet;
import java.util.HashSet; import java.util.HashSet;
import java.util.List; import java.util.List;
import java.util.Map;
import java.util.Set; import java.util.Set;
import static org.apache.hadoop.hdfs.server.namenode.snapshot.Snapshot.CURRENT_STATE_ID; import static org.apache.hadoop.hdfs.server.namenode.snapshot.Snapshot.CURRENT_STATE_ID;
@ -419,10 +418,10 @@ class FSDirWriteFileOp {
} }
fsn.checkFsObjectLimit(); fsn.checkFsObjectLimit();
INodeFile newNode = null; INodeFile newNode = null;
Map.Entry<INodesInPath, String> parent = FSDirMkdirOp INodesInPath parent =
.createAncestorDirectories(fsd, iip, permissions); FSDirMkdirOp.createAncestorDirectories(fsd, iip, permissions);
if (parent != null) { if (parent != null) {
iip = addFile(fsd, parent.getKey(), parent.getValue(), permissions, iip = addFile(fsd, parent, iip.getLastLocalName(), permissions,
replication, blockSize, holder, clientMachine); replication, blockSize, holder, clientMachine);
newNode = iip != null ? iip.getLastINode().asFile() : null; newNode = iip != null ? iip.getLastINode().asFile() : null;
} }
@ -572,7 +571,7 @@ class FSDirWriteFileOp {
* @return the new INodesInPath instance that contains the new INode * @return the new INodesInPath instance that contains the new INode
*/ */
private static INodesInPath addFile( private static INodesInPath addFile(
FSDirectory fsd, INodesInPath existing, String localName, FSDirectory fsd, INodesInPath existing, byte[] localName,
PermissionStatus permissions, short replication, long preferredBlockSize, PermissionStatus permissions, short replication, long preferredBlockSize,
String clientName, String clientMachine) String clientName, String clientMachine)
throws IOException { throws IOException {
@ -589,7 +588,7 @@ class FSDirWriteFileOp {
} }
INodeFile newNode = newINodeFile(fsd.allocateNewInodeId(), permissions, INodeFile newNode = newINodeFile(fsd.allocateNewInodeId(), permissions,
modTime, modTime, replication, preferredBlockSize, ecPolicy != null); modTime, modTime, replication, preferredBlockSize, ecPolicy != null);
newNode.setLocalName(DFSUtil.string2Bytes(localName)); newNode.setLocalName(localName);
newNode.toUnderConstruction(clientName, clientMachine); newNode.toUnderConstruction(clientName, clientMachine);
newiip = fsd.addINode(existing, newNode); newiip = fsd.addINode(existing, newNode);
} finally { } finally {
@ -597,12 +596,13 @@ class FSDirWriteFileOp {
} }
if (newiip == null) { if (newiip == null) {
NameNode.stateChangeLog.info("DIR* addFile: failed to add " + NameNode.stateChangeLog.info("DIR* addFile: failed to add " +
existing.getPath() + "/" + localName); existing.getPath() + "/" + DFSUtil.bytes2String(localName));
return null; return null;
} }
if(NameNode.stateChangeLog.isDebugEnabled()) { if(NameNode.stateChangeLog.isDebugEnabled()) {
NameNode.stateChangeLog.debug("DIR* addFile: " + localName + " is added"); NameNode.stateChangeLog.debug("DIR* addFile: " +
DFSUtil.bytes2String(localName) + " is added");
} }
return newiip; return newiip;
} }

View File

@ -22,7 +22,6 @@ import java.util.Collections;
import java.util.List; import java.util.List;
import java.util.NoSuchElementException; import java.util.NoSuchElementException;
import com.google.common.collect.ImmutableList;
import org.apache.commons.logging.Log; import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory; import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.Path;
@ -361,6 +360,10 @@ public class INodesInPath {
return path; return path;
} }
public byte[] getPathComponent(int i) {
return path[i];
}
/** @return the full path in string form */ /** @return the full path in string form */
public String getPath() { public String getPath() {
return DFSUtil.byteArray2PathString(path); return DFSUtil.byteArray2PathString(path);
@ -374,21 +377,6 @@ public class INodesInPath {
return DFSUtil.byteArray2PathString(path, 0, pos + 1); // it's a length... return DFSUtil.byteArray2PathString(path, 0, pos + 1); // it's a length...
} }
/**
* @param offset start endpoint (inclusive)
* @param length number of path components
* @return sub-list of the path
*/
public List<String> getPath(int offset, int length) {
Preconditions.checkArgument(offset >= 0 && length >= 0 && offset + length
<= path.length);
ImmutableList.Builder<String> components = ImmutableList.builder();
for (int i = offset; i < offset + length; i++) {
components.add(DFSUtil.bytes2String(path[i]));
}
return components.build();
}
public int length() { public int length() {
return inodes.length; return inodes.length;
} }
@ -429,22 +417,17 @@ public class INodesInPath {
} }
/** /**
* @return a new INodesInPath instance that only contains exisitng INodes. * @return a new INodesInPath instance that only contains existing INodes.
* Note that this method only handles non-snapshot paths. * Note that this method only handles non-snapshot paths.
*/ */
public INodesInPath getExistingINodes() { public INodesInPath getExistingINodes() {
Preconditions.checkState(!isSnapshot()); Preconditions.checkState(!isSnapshot());
int i = 0; for (int i = inodes.length; i > 0; i--) {
for (; i < inodes.length; i++) { if (inodes[i - 1] != null) {
if (inodes[i] == null) { return (i == inodes.length) ? this : getAncestorINodesInPath(i);
break;
} }
} }
INode[] existing = new INode[i]; return null;
byte[][] existingPath = new byte[i][];
System.arraycopy(inodes, 0, existing, 0, i);
System.arraycopy(path, 0, existingPath, 0, i);
return new INodesInPath(existing, existingPath, isRaw, false, snapshotId);
} }
/** /**

View File

@ -146,6 +146,11 @@ public class TestSnapshotPathINodes {
// The returned nodesInPath should be non-snapshot // The returned nodesInPath should be non-snapshot
assertSnapshot(nodesInPath, false, null, -1); assertSnapshot(nodesInPath, false, null, -1);
// verify components are correct
for (int i=0; i < components.length; i++) {
assertEquals(components[i], nodesInPath.getPathComponent(i));
}
// The last INode should be associated with file1 // The last INode should be associated with file1
assertTrue("file1=" + file1 + ", nodesInPath=" + nodesInPath, assertTrue("file1=" + file1 + ", nodesInPath=" + nodesInPath,
nodesInPath.getINode(components.length - 1) != null); nodesInPath.getINode(components.length - 1) != null);