From 6a23c376c9d0528e0639dcb2c396f8adb95ec164 Mon Sep 17 00:00:00 2001 From: zhangshuyan <81411509+zhangshuyan0@users.noreply.github.com> Date: Mon, 24 Apr 2023 18:53:25 +0800 Subject: [PATCH] HDFS-16986. EC: Fix locationBudget in getListing(). (#5582). Contributed by Shuyan Zhang. Signed-off-by: Ayush Saxena Signed-off-by: He Xiaoqiao --- .../namenode/FSDirStatAndListingOp.java | 25 +++++++---- .../hdfs/TestDistributedFileSystem.java | 41 +++++++++++++++++++ 2 files changed, 59 insertions(+), 7 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirStatAndListingOp.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirStatAndListingOp.java index 236e308f410..4547228364d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirStatAndListingOp.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirStatAndListingOp.java @@ -262,13 +262,24 @@ class FSDirStatAndListingOp { needLocation, false); listingCnt++; if (listing[i] instanceof HdfsLocatedFileStatus) { - // Once we hit lsLimit locations, stop. - // This helps to prevent excessively large response payloads. - // Approximate #locations with locatedBlockCount() * repl_factor - LocatedBlocks blks = - ((HdfsLocatedFileStatus)listing[i]).getLocatedBlocks(); - locationBudget -= (blks == null) ? 0 : - blks.locatedBlockCount() * listing[i].getReplication(); + // Once we hit lsLimit locations, stop. + // This helps to prevent excessively large response payloads. + LocatedBlocks blks = + ((HdfsLocatedFileStatus) listing[i]).getLocatedBlocks(); + if (blks != null) { + ErasureCodingPolicy ecPolicy = listing[i].getErasureCodingPolicy(); + if (ecPolicy != null && !ecPolicy.isReplicationPolicy()) { + // Approximate #locations with locatedBlockCount() * + // internalBlocksNum. + locationBudget -= blks.locatedBlockCount() * + (ecPolicy.getNumDataUnits() + ecPolicy.getNumParityUnits()); + } else { + // Approximate #locations with locatedBlockCount() * + // replicationFactor. + locationBudget -= + blks.locatedBlockCount() * listing[i].getReplication(); + } + } } } // truncate return array if necessary diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDistributedFileSystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDistributedFileSystem.java index 2773214f45d..9e8c11d7b06 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDistributedFileSystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDistributedFileSystem.java @@ -29,6 +29,9 @@ import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.anyBoolean; +import static org.mockito.ArgumentMatchers.anyString; import static org.mockito.ArgumentMatchers.eq; import static org.mockito.Mockito.inOrder; import static org.mockito.Mockito.mock; @@ -674,6 +677,44 @@ public class TestDistributedFileSystem { } } + /** + * This is to test that {@link DFSConfigKeys#DFS_LIST_LIMIT} works as + * expected when {@link DistributedFileSystem#listLocatedStatus} is called. + */ + @Test + public void testGetListingLimit() throws Exception { + final Configuration conf = getTestConfiguration(); + conf.setInt(DFSConfigKeys.DFS_LIST_LIMIT, 9); + try (MiniDFSCluster cluster = + new MiniDFSCluster.Builder(conf).numDataNodes(9).build()) { + cluster.waitActive(); + ErasureCodingPolicy ecPolicy = StripedFileTestUtil.getDefaultECPolicy(); + final DistributedFileSystem fs = cluster.getFileSystem(); + fs.dfs = spy(fs.dfs); + Path dir1 = new Path("/testRep"); + Path dir2 = new Path("/testEC"); + fs.mkdirs(dir1); + fs.mkdirs(dir2); + fs.setErasureCodingPolicy(dir2, ecPolicy.getName()); + for (int i = 0; i < 3; i++) { + DFSTestUtil.createFile(fs, new Path(dir1, String.valueOf(i)), + 20 * 1024L, (short) 3, 1); + DFSTestUtil.createStripedFile(cluster, new Path(dir2, + String.valueOf(i)), dir2, 1, 1, false); + } + + List str = RemoteIterators.toList(fs.listLocatedStatus(dir1)); + assertThat(str).hasSize(3); + Mockito.verify(fs.dfs, Mockito.times(1)).listPaths(anyString(), any(), + anyBoolean()); + + str = RemoteIterators.toList(fs.listLocatedStatus(dir2)); + assertThat(str).hasSize(3); + Mockito.verify(fs.dfs, Mockito.times(4)).listPaths(anyString(), any(), + anyBoolean()); + } + } + @Test public void testStatistics() throws IOException { FileSystem.getStatistics(HdfsConstants.HDFS_URI_SCHEME,