HDFS-4649. Webhdfs cannot list large directories. Contributed by Daryn Sharp.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1463698 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Kihwal Lee 2013-04-02 19:11:56 +00:00
parent d40b154d27
commit 8293e22565
3 changed files with 85 additions and 16 deletions

View File

@ -2485,6 +2485,8 @@ Release 0.23.7 - UNRELEASED
HDFS-4581. checkDiskError should not be called on network errors (Rohit
Kochar via kihwal)
HDFS-4649. Webhdfs cannot list large directories (daryn via kihwal)
Release 0.23.6 - UNRELEASED
INCOMPATIBLE CHANGES

View File

@ -718,9 +718,15 @@ public class NamenodeWebHdfsMethods {
private static StreamingOutput getListingStream(final NamenodeProtocols np,
final String p) throws IOException {
final DirectoryListing first = getDirectoryListing(np, p,
// allows exceptions like FNF or ACE to prevent http response of 200 for
// a failure since we can't (currently) return error responses in the
// middle of a streaming operation
final DirectoryListing firstDirList = getDirectoryListing(np, p,
HdfsFileStatus.EMPTY_NAME);
// must save ugi because the streaming object will be executed outside
// the remote user's ugi
final UserGroupInformation ugi = UserGroupInformation.getCurrentUser();
return new StreamingOutput() {
@Override
public void write(final OutputStream outstream) throws IOException {
@ -729,21 +735,32 @@ public class NamenodeWebHdfsMethods {
out.println("{\"" + FileStatus.class.getSimpleName() + "es\":{\""
+ FileStatus.class.getSimpleName() + "\":[");
final HdfsFileStatus[] partial = first.getPartialListing();
if (partial.length > 0) {
out.print(JsonUtil.toJsonString(partial[0], false));
}
for(int i = 1; i < partial.length; i++) {
try {
// restore remote user's ugi
ugi.doAs(new PrivilegedExceptionAction<Void>() {
@Override
public Void run() throws IOException {
long n = 0;
for (DirectoryListing dirList = firstDirList; ;
dirList = getDirectoryListing(np, p, dirList.getLastName())
) {
// send each segment of the directory listing
for (HdfsFileStatus s : dirList.getPartialListing()) {
if (n++ > 0) {
out.println(',');
out.print(JsonUtil.toJsonString(partial[i], false));
}
for(DirectoryListing curr = first; curr.hasMore(); ) {
curr = getDirectoryListing(np, p, curr.getLastName());
for(HdfsFileStatus s : curr.getPartialListing()) {
out.println(',');
out.print(JsonUtil.toJsonString(s, false));
}
// stop if last segment
if (!dirList.hasMore()) {
break;
}
}
return null;
}
});
} catch (InterruptedException e) {
throw new IOException(e);
}
out.println();

View File

@ -19,6 +19,8 @@
package org.apache.hadoop.hdfs.web;
import java.io.IOException;
import java.net.URISyntaxException;
import java.security.PrivilegedExceptionAction;
import java.util.Random;
import org.apache.commons.logging.Log;
@ -29,9 +31,13 @@ import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.permission.FsAction;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.hdfs.DFSConfigKeys;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.hdfs.TestDFSClientRetries;
import org.apache.hadoop.hdfs.server.namenode.web.resources.NamenodeWebHdfsMethods;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.log4j.Level;
import org.junit.Assert;
import org.junit.Test;
@ -208,4 +214,48 @@ public class TestWebHDFS {
final Configuration conf = WebHdfsTestUtil.createConf();
TestDFSClientRetries.namenodeRestartTest(conf, true);
}
@Test(timeout=300000)
public void testLargeDirectory() throws Exception {
final Configuration conf = WebHdfsTestUtil.createConf();
final int listLimit = 2;
// force small chunking of directory listing
conf.setInt(DFSConfigKeys.DFS_LIST_LIMIT, listLimit);
// force paths to be only owner-accessible to ensure ugi isn't changing
// during listStatus
FsPermission.setUMask(conf, new FsPermission((short)0077));
final MiniDFSCluster cluster =
new MiniDFSCluster.Builder(conf).numDataNodes(3).build();
try {
cluster.waitActive();
WebHdfsTestUtil.getWebHdfsFileSystem(conf).setPermission(
new Path("/"),
new FsPermission(FsAction.ALL, FsAction.ALL, FsAction.ALL));
// trick the NN into not believing it's not the superuser so we can
// tell if the correct user is used by listStatus
UserGroupInformation.setLoginUser(
UserGroupInformation.createUserForTesting(
"not-superuser", new String[]{"not-supergroup"}));
UserGroupInformation.createUserForTesting("me", new String[]{"my-group"})
.doAs(new PrivilegedExceptionAction<Void>() {
@Override
public Void run() throws IOException, URISyntaxException {
FileSystem fs = WebHdfsTestUtil.getWebHdfsFileSystem(conf);
Path d = new Path("/my-dir");
Assert.assertTrue(fs.mkdirs(d));
for (int i=0; i < listLimit*3; i++) {
Path p = new Path(d, "file-"+i);
Assert.assertTrue(fs.createNewFile(p));
}
Assert.assertEquals(listLimit*3, fs.listStatus(d).length);
return null;
}
});
} finally {
cluster.shutdown();
}
}
}