HDFS-4649. Webhdfs cannot list large directories. Contributed by Daryn Sharp.
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1463698 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
d40b154d27
commit
8293e22565
|
@ -2485,6 +2485,8 @@ Release 0.23.7 - UNRELEASED
|
|||
HDFS-4581. checkDiskError should not be called on network errors (Rohit
|
||||
Kochar via kihwal)
|
||||
|
||||
HDFS-4649. Webhdfs cannot list large directories (daryn via kihwal)
|
||||
|
||||
Release 0.23.6 - UNRELEASED
|
||||
|
||||
INCOMPATIBLE CHANGES
|
||||
|
|
|
@ -718,9 +718,15 @@ public class NamenodeWebHdfsMethods {
|
|||
|
||||
private static StreamingOutput getListingStream(final NamenodeProtocols np,
|
||||
final String p) throws IOException {
|
||||
final DirectoryListing first = getDirectoryListing(np, p,
|
||||
// allows exceptions like FNF or ACE to prevent http response of 200 for
|
||||
// a failure since we can't (currently) return error responses in the
|
||||
// middle of a streaming operation
|
||||
final DirectoryListing firstDirList = getDirectoryListing(np, p,
|
||||
HdfsFileStatus.EMPTY_NAME);
|
||||
|
||||
// must save ugi because the streaming object will be executed outside
|
||||
// the remote user's ugi
|
||||
final UserGroupInformation ugi = UserGroupInformation.getCurrentUser();
|
||||
return new StreamingOutput() {
|
||||
@Override
|
||||
public void write(final OutputStream outstream) throws IOException {
|
||||
|
@ -729,21 +735,32 @@ public class NamenodeWebHdfsMethods {
|
|||
out.println("{\"" + FileStatus.class.getSimpleName() + "es\":{\""
|
||||
+ FileStatus.class.getSimpleName() + "\":[");
|
||||
|
||||
final HdfsFileStatus[] partial = first.getPartialListing();
|
||||
if (partial.length > 0) {
|
||||
out.print(JsonUtil.toJsonString(partial[0], false));
|
||||
}
|
||||
for(int i = 1; i < partial.length; i++) {
|
||||
out.println(',');
|
||||
out.print(JsonUtil.toJsonString(partial[i], false));
|
||||
}
|
||||
|
||||
for(DirectoryListing curr = first; curr.hasMore(); ) {
|
||||
curr = getDirectoryListing(np, p, curr.getLastName());
|
||||
for(HdfsFileStatus s : curr.getPartialListing()) {
|
||||
out.println(',');
|
||||
out.print(JsonUtil.toJsonString(s, false));
|
||||
}
|
||||
try {
|
||||
// restore remote user's ugi
|
||||
ugi.doAs(new PrivilegedExceptionAction<Void>() {
|
||||
@Override
|
||||
public Void run() throws IOException {
|
||||
long n = 0;
|
||||
for (DirectoryListing dirList = firstDirList; ;
|
||||
dirList = getDirectoryListing(np, p, dirList.getLastName())
|
||||
) {
|
||||
// send each segment of the directory listing
|
||||
for (HdfsFileStatus s : dirList.getPartialListing()) {
|
||||
if (n++ > 0) {
|
||||
out.println(',');
|
||||
}
|
||||
out.print(JsonUtil.toJsonString(s, false));
|
||||
}
|
||||
// stop if last segment
|
||||
if (!dirList.hasMore()) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
});
|
||||
} catch (InterruptedException e) {
|
||||
throw new IOException(e);
|
||||
}
|
||||
|
||||
out.println();
|
||||
|
|
|
@ -19,6 +19,8 @@
|
|||
package org.apache.hadoop.hdfs.web;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.URISyntaxException;
|
||||
import java.security.PrivilegedExceptionAction;
|
||||
import java.util.Random;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
|
@ -29,9 +31,13 @@ import org.apache.hadoop.fs.FSDataInputStream;
|
|||
import org.apache.hadoop.fs.FSDataOutputStream;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.fs.permission.FsAction;
|
||||
import org.apache.hadoop.fs.permission.FsPermission;
|
||||
import org.apache.hadoop.hdfs.DFSConfigKeys;
|
||||
import org.apache.hadoop.hdfs.MiniDFSCluster;
|
||||
import org.apache.hadoop.hdfs.TestDFSClientRetries;
|
||||
import org.apache.hadoop.hdfs.server.namenode.web.resources.NamenodeWebHdfsMethods;
|
||||
import org.apache.hadoop.security.UserGroupInformation;
|
||||
import org.apache.log4j.Level;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
|
@ -208,4 +214,48 @@ public class TestWebHDFS {
|
|||
final Configuration conf = WebHdfsTestUtil.createConf();
|
||||
TestDFSClientRetries.namenodeRestartTest(conf, true);
|
||||
}
|
||||
|
||||
@Test(timeout=300000)
|
||||
public void testLargeDirectory() throws Exception {
|
||||
final Configuration conf = WebHdfsTestUtil.createConf();
|
||||
final int listLimit = 2;
|
||||
// force small chunking of directory listing
|
||||
conf.setInt(DFSConfigKeys.DFS_LIST_LIMIT, listLimit);
|
||||
// force paths to be only owner-accessible to ensure ugi isn't changing
|
||||
// during listStatus
|
||||
FsPermission.setUMask(conf, new FsPermission((short)0077));
|
||||
|
||||
final MiniDFSCluster cluster =
|
||||
new MiniDFSCluster.Builder(conf).numDataNodes(3).build();
|
||||
try {
|
||||
cluster.waitActive();
|
||||
WebHdfsTestUtil.getWebHdfsFileSystem(conf).setPermission(
|
||||
new Path("/"),
|
||||
new FsPermission(FsAction.ALL, FsAction.ALL, FsAction.ALL));
|
||||
|
||||
// trick the NN into not believing it's not the superuser so we can
|
||||
// tell if the correct user is used by listStatus
|
||||
UserGroupInformation.setLoginUser(
|
||||
UserGroupInformation.createUserForTesting(
|
||||
"not-superuser", new String[]{"not-supergroup"}));
|
||||
|
||||
UserGroupInformation.createUserForTesting("me", new String[]{"my-group"})
|
||||
.doAs(new PrivilegedExceptionAction<Void>() {
|
||||
@Override
|
||||
public Void run() throws IOException, URISyntaxException {
|
||||
FileSystem fs = WebHdfsTestUtil.getWebHdfsFileSystem(conf);
|
||||
Path d = new Path("/my-dir");
|
||||
Assert.assertTrue(fs.mkdirs(d));
|
||||
for (int i=0; i < listLimit*3; i++) {
|
||||
Path p = new Path(d, "file-"+i);
|
||||
Assert.assertTrue(fs.createNewFile(p));
|
||||
}
|
||||
Assert.assertEquals(listLimit*3, fs.listStatus(d).length);
|
||||
return null;
|
||||
}
|
||||
});
|
||||
} finally {
|
||||
cluster.shutdown();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue