HDFS-16986. EC: Fix locationBudget in getListing(). (#5582). Contributed by Shuyan Zhang.

Signed-off-by: Ayush Saxena <ayushsaxena@apache.org>
Signed-off-by: He Xiaoqiao <hexiaoqiao@apache.org>
This commit is contained in:
zhangshuyan 2023-04-24 18:53:25 +08:00 committed by GitHub
parent 5b23224970
commit 6a23c376c9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 59 additions and 7 deletions

View File

@ -262,13 +262,24 @@ class FSDirStatAndListingOp {
needLocation, false);
listingCnt++;
if (listing[i] instanceof HdfsLocatedFileStatus) {
// Once we hit lsLimit locations, stop.
// This helps to prevent excessively large response payloads.
// Approximate #locations with locatedBlockCount() * repl_factor
LocatedBlocks blks =
((HdfsLocatedFileStatus)listing[i]).getLocatedBlocks();
locationBudget -= (blks == null) ? 0 :
blks.locatedBlockCount() * listing[i].getReplication();
// Once we hit lsLimit locations, stop.
// This helps to prevent excessively large response payloads.
LocatedBlocks blks =
((HdfsLocatedFileStatus) listing[i]).getLocatedBlocks();
if (blks != null) {
ErasureCodingPolicy ecPolicy = listing[i].getErasureCodingPolicy();
if (ecPolicy != null && !ecPolicy.isReplicationPolicy()) {
// Approximate #locations with locatedBlockCount() *
// internalBlocksNum.
locationBudget -= blks.locatedBlockCount() *
(ecPolicy.getNumDataUnits() + ecPolicy.getNumParityUnits());
} else {
// Approximate #locations with locatedBlockCount() *
// replicationFactor.
locationBudget -=
blks.locatedBlockCount() * listing[i].getReplication();
}
}
}
}
// truncate return array if necessary

View File

@ -29,6 +29,9 @@ import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
import static org.mockito.ArgumentMatchers.any;
import static org.mockito.ArgumentMatchers.anyBoolean;
import static org.mockito.ArgumentMatchers.anyString;
import static org.mockito.ArgumentMatchers.eq;
import static org.mockito.Mockito.inOrder;
import static org.mockito.Mockito.mock;
@ -674,6 +677,44 @@ public class TestDistributedFileSystem {
}
}
/**
* This is to test that {@link DFSConfigKeys#DFS_LIST_LIMIT} works as
* expected when {@link DistributedFileSystem#listLocatedStatus} is called.
*/
@Test
public void testGetListingLimit() throws Exception {
final Configuration conf = getTestConfiguration();
conf.setInt(DFSConfigKeys.DFS_LIST_LIMIT, 9);
try (MiniDFSCluster cluster =
new MiniDFSCluster.Builder(conf).numDataNodes(9).build()) {
cluster.waitActive();
ErasureCodingPolicy ecPolicy = StripedFileTestUtil.getDefaultECPolicy();
final DistributedFileSystem fs = cluster.getFileSystem();
fs.dfs = spy(fs.dfs);
Path dir1 = new Path("/testRep");
Path dir2 = new Path("/testEC");
fs.mkdirs(dir1);
fs.mkdirs(dir2);
fs.setErasureCodingPolicy(dir2, ecPolicy.getName());
for (int i = 0; i < 3; i++) {
DFSTestUtil.createFile(fs, new Path(dir1, String.valueOf(i)),
20 * 1024L, (short) 3, 1);
DFSTestUtil.createStripedFile(cluster, new Path(dir2,
String.valueOf(i)), dir2, 1, 1, false);
}
List<LocatedFileStatus> str = RemoteIterators.toList(fs.listLocatedStatus(dir1));
assertThat(str).hasSize(3);
Mockito.verify(fs.dfs, Mockito.times(1)).listPaths(anyString(), any(),
anyBoolean());
str = RemoteIterators.toList(fs.listLocatedStatus(dir2));
assertThat(str).hasSize(3);
Mockito.verify(fs.dfs, Mockito.times(4)).listPaths(anyString(), any(),
anyBoolean());
}
}
@Test
public void testStatistics() throws IOException {
FileSystem.getStatistics(HdfsConstants.HDFS_URI_SCHEME,