From 95e3ef2ad40dfd1ecb849a6620fcc85e3257165f Mon Sep 17 00:00:00 2001 From: Kihwal Lee Date: Thu, 23 Jan 2014 18:13:17 +0000 Subject: [PATCH] svn merge -c 1560750 merging from trunk to branch-2 to fix:HDFS-5788. listLocatedStatus response can be very large. Contributed by Nathan Roberts. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-2@1560778 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 + .../hdfs/server/namenode/FSDirectory.java | 26 ++++++- .../hdfs/server/namenode/TestINodeFile.java | 72 ++++++++++++++++++- 3 files changed, 97 insertions(+), 4 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 99487004cfa..77252a54c90 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -212,6 +212,9 @@ Release 2.4.0 - UNRELEASED HDFS-5434. Change block placement policy constructors from package private to protected. (Buddy Taylor via Arpit Agarwal) + HDFS-5788. listLocatedStatus response can be very large. (Nathan Roberts + via kihwal) + OPTIMIZATIONS HDFS-5239. Allow FSNamesystem lock fairness to be configurable (daryn) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java index 04edeab7977..8b9fad1a548 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java @@ -171,7 +171,6 @@ public class FSDirectory implements Closeable { DFSConfigKeys.DFS_LIST_LIMIT, DFSConfigKeys.DFS_LIST_LIMIT_DEFAULT); this.lsLimit = configuredLimit>0 ? configuredLimit : DFSConfigKeys.DFS_LIST_LIMIT_DEFAULT; - this.contentCountLimit = conf.getInt( DFSConfigKeys.DFS_CONTENT_SUMMARY_LIMIT_KEY, DFSConfigKeys.DFS_CONTENT_SUMMARY_LIMIT_DEFAULT); @@ -1532,6 +1531,11 @@ public class FSDirectory implements Closeable { /** * Get a partial listing of the indicated directory * + * We will stop when any of the following conditions is met: + * 1) this.lsLimit files have been added + * 2) needLocation is true AND enough files have been added such + * that at least this.lsLimit block locations are in the response + * * @param src the directory name * @param startAfter the name to start listing after * @param needLocation if block locations are returned @@ -1563,14 +1567,30 @@ public class FSDirectory implements Closeable { int startChild = INodeDirectory.nextChild(contents, startAfter); int totalNumChildren = contents.size(); int numOfListing = Math.min(totalNumChildren-startChild, this.lsLimit); + int locationBudget = this.lsLimit; + int listingCnt = 0; HdfsFileStatus listing[] = new HdfsFileStatus[numOfListing]; - for (int i=0; i0; i++) { INode cur = contents.get(startChild+i); listing[i] = createFileStatus(cur.getLocalNameBytes(), cur, needLocation, snapshot); + listingCnt++; + if (needLocation) { + // Once we hit lsLimit locations, stop. + // This helps to prevent excessively large response payloads. + // Approximate #locations with locatedBlockCount() * repl_factor + LocatedBlocks blks = + ((HdfsLocatedFileStatus)listing[i]).getBlockLocations(); + locationBudget -= (blks == null) ? 0 : + blks.locatedBlockCount() * listing[i].getReplication(); + } + } + // truncate return array if necessary + if (listingCnt < numOfListing) { + listing = Arrays.copyOf(listing, listingCnt); } return new DirectoryListing( - listing, totalNumChildren-startChild-numOfListing); + listing, totalNumChildren-startChild-listingCnt); } finally { readUnlock(); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestINodeFile.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestINodeFile.java index 56596341244..18949be835c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestINodeFile.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestINodeFile.java @@ -25,6 +25,7 @@ import static org.junit.Assert.fail; import java.io.FileNotFoundException; import java.io.IOException; +import java.util.ArrayList; import java.util.Arrays; import java.util.List; @@ -931,7 +932,76 @@ public class TestINodeFile { } } } - + @Test + public void testLocationLimitInListingOps() throws Exception { + final Configuration conf = new Configuration(); + conf.setInt(DFSConfigKeys.DFS_LIST_LIMIT, 9); // 3 blocks * 3 replicas + MiniDFSCluster cluster = null; + try { + cluster = new MiniDFSCluster.Builder(conf).numDataNodes(3).build(); + cluster.waitActive(); + final DistributedFileSystem hdfs = cluster.getFileSystem(); + ArrayList source = new ArrayList(); + + // tmp1 holds files with 3 blocks, 3 replicas + // tmp2 holds files with 3 blocks, 1 replica + hdfs.mkdirs(new Path("/tmp1")); + hdfs.mkdirs(new Path("/tmp2")); + + source.add("f1"); + source.add("f2"); + + int numEntries = source.size(); + for (int j=0;j