HDFS-15574. Remove unnecessary sort of block list in DirectoryScanner. Contributed by Stephen O'Donnell.

This commit is contained in:
hemanthboyina 2020-09-17 10:15:18 +05:30
parent 875219bc8e
commit aa582ccc2a
8 changed files with 50 additions and 14 deletions

View File

@ -22,7 +22,6 @@ import java.io.File;
import java.io.FilenameFilter;
import java.io.IOException;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
@ -405,9 +404,8 @@ public class DirectoryScanner implements Runnable {
diffs.put(bpid, diffRecord);
statsRecord.totalBlocks = blockpoolReport.length;
final List<ReplicaInfo> bl = dataset.getFinalizedBlocks(bpid);
Collections.sort(bl); // Sort based on blockId
final List<ReplicaInfo> bl = dataset.getSortedFinalizedBlocks(bpid);
int d = 0; // index for blockpoolReport
int m = 0; // index for memReprot
while (m < bl.size() && d < blockpoolReport.length) {

View File

@ -237,16 +237,17 @@ public interface FsDatasetSpi<V extends FsVolumeSpi> extends FSDatasetMBean {
VolumeFailureSummary getVolumeFailureSummary();
/**
* Gets a list of references to the finalized blocks for the given block pool.
* Gets a sorted list of references to the finalized blocks for the given
* block pool. The list is sorted by blockID.
* <p>
* Callers of this function should call
* {@link FsDatasetSpi#acquireDatasetLock} to avoid blocks' status being
* changed during list iteration.
* </p>
* @return a list of references to the finalized blocks for the given block
* pool.
* pool. The list is sorted by blockID.
*/
List<ReplicaInfo> getFinalizedBlocks(String bpid);
List<ReplicaInfo> getSortedFinalizedBlocks(String bpid);
/**
* Check whether the in-memory block record matches the block on the disk,

View File

@ -1920,17 +1920,18 @@ class FsDatasetImpl implements FsDatasetSpi<FsVolumeImpl> {
}
/**
* Gets a list of references to the finalized blocks for the given block pool.
* Gets a list of references to the finalized blocks for the given block pool,
* sorted by blockID.
* <p>
* Callers of this function should call
* {@link FsDatasetSpi#acquireDatasetLock} to avoid blocks' status being
* changed during list iteration.
* </p>
* @return a list of references to the finalized blocks for the given block
* pool.
* pool. The list is sorted by blockID.
*/
@Override
public List<ReplicaInfo> getFinalizedBlocks(String bpid) {
public List<ReplicaInfo> getSortedFinalizedBlocks(String bpid) {
try (AutoCloseableLock lock = datasetLock.acquire()) {
final List<ReplicaInfo> finalized = new ArrayList<ReplicaInfo>(
volumeMap.size(bpid));

View File

@ -173,7 +173,7 @@ public class TestCrcCorruption {
final DataNode dn = cluster.getDataNodes().get(dnIdx);
final String bpid = cluster.getNamesystem().getBlockPoolId();
List<ReplicaInfo> replicas =
dn.getFSDataset().getFinalizedBlocks(bpid);
dn.getFSDataset().getSortedFinalizedBlocks(bpid);
assertTrue("Replicas do not exist", !replicas.isEmpty());
for (int idx = 0; idx < replicas.size(); idx++) {

View File

@ -540,7 +540,7 @@ public class TestReconstructStripedFile {
writeFile(fs, "/ec-xmits-weight", fileLen);
DataNode dn = cluster.getDataNodes().get(0);
int corruptBlocks = dn.getFSDataset().getFinalizedBlocks(
int corruptBlocks = dn.getFSDataset().getSortedFinalizedBlocks(
cluster.getNameNode().getNamesystem().getBlockPoolId()).size();
int expectedXmits = corruptBlocks * expectedWeight;

View File

@ -1502,7 +1502,7 @@ public class SimulatedFSDataset implements FsDatasetSpi<FsVolumeSpi> {
}
@Override
public List<ReplicaInfo> getFinalizedBlocks(String bpid) {
public List<ReplicaInfo> getSortedFinalizedBlocks(String bpid) {
throw new UnsupportedOperationException();
}

View File

@ -90,7 +90,7 @@ public class ExternalDatasetImpl implements FsDatasetSpi<ExternalVolumeImpl> {
}
@Override
public List<ReplicaInfo> getFinalizedBlocks(String bpid) {
public List<ReplicaInfo> getSortedFinalizedBlocks(String bpid) {
return null;
}

View File

@ -80,6 +80,7 @@ import java.io.OutputStreamWriter;
import java.io.Writer;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Random;
import java.util.concurrent.CountDownLatch;
import java.util.HashSet;
import java.util.List;
@ -470,6 +471,41 @@ public class TestFsDatasetImpl {
FsDatasetTestUtil.assertFileLockReleased(badDir.toString());
}
@Test
/**
* This test is here primarily to catch any case where the datanode replica
* map structure is changed to a new structure which is not sorted and hence
* reading the blocks from it directly would not be sorted.
*/
public void testSortedFinalizedBlocksAreSorted() throws IOException {
this.conf = new HdfsConfiguration();
MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).build();
try {
cluster.waitActive();
DataNode dn = cluster.getDataNodes().get(0);
FsDatasetSpi<?> ds = DataNodeTestUtils.getFSDataset(dn);
ds.addBlockPool(BLOCKPOOL, conf);
// Load 1000 blocks with random blockIDs
for (int i=0; i<=1000; i++) {
ExtendedBlock eb = new ExtendedBlock(
BLOCKPOOL, new Random().nextInt(), 1000, 1000 + i);
cluster.getFsDatasetTestUtils(0).createFinalizedReplica(eb);
}
// Get the sorted blocks and validate the arrayList is sorted
List<ReplicaInfo> replicaList = ds.getSortedFinalizedBlocks(BLOCKPOOL);
for (int i=0; i<replicaList.size() - 1; i++) {
if (replicaList.get(i).compareTo(replicaList.get(i+1)) > 0) {
// Not sorted so fail the test
fail("ArrayList is not sorted, and it should be");
}
}
} finally {
cluster.shutdown();
}
}
@Test
public void testDeletingBlocks() throws IOException {