From 2848a27700e76fc2c2d3ab87adba54ca8e93ba39 Mon Sep 17 00:00:00 2001 From: Mingliang Liu Date: Fri, 28 Oct 2016 11:11:31 -0700 Subject: [PATCH] HDFS-11047. Remove deep copies of FinalizedReplica to alleviate heap consumption on DataNode. Contributed by Xiaobing Zhou --- .../hdfs/server/datanode/DirectoryScanner.java | 15 +++++++-------- .../server/datanode/fsdataset/FsDatasetSpi.java | 11 ++++++++++- .../datanode/fsdataset/impl/FsDatasetImpl.java | 13 ++++++++++--- 3 files changed, 27 insertions(+), 12 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DirectoryScanner.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DirectoryScanner.java index 421d067a3c9..e44f5edab13 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DirectoryScanner.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DirectoryScanner.java @@ -596,14 +596,13 @@ public class DirectoryScanner implements Runnable { diffs.put(bpid, diffRecord); statsRecord.totalBlocks = blockpoolReport.length; - List bl = dataset.getFinalizedBlocks(bpid); - FinalizedReplica[] memReport = bl.toArray(new FinalizedReplica[bl.size()]); - Arrays.sort(memReport); // Sort based on blockId - + final List bl = dataset.getFinalizedBlocks(bpid); + Collections.sort(bl); // Sort based on blockId + int d = 0; // index for blockpoolReport int m = 0; // index for memReprot - while (m < memReport.length && d < blockpoolReport.length) { - FinalizedReplica memBlock = memReport[m]; + while (m < bl.size() && d < blockpoolReport.length) { + FinalizedReplica memBlock = bl.get(m); ScanInfo info = blockpoolReport[d]; if (info.getBlockId() < memBlock.getBlockId()) { if (!dataset.isDeletingBlock(bpid, info.getBlockId())) { @@ -650,8 +649,8 @@ public class DirectoryScanner implements Runnable { ++m; } } - while (m < memReport.length) { - FinalizedReplica current = memReport[m++]; + while (m < bl.size()) { + FinalizedReplica current = bl.get(m++); addDifference(diffRecord, statsRecord, current.getBlockId(), current.getVolume()); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/FsDatasetSpi.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/FsDatasetSpi.java index ac3c5b4c56a..19f580abc37 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/FsDatasetSpi.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/FsDatasetSpi.java @@ -230,7 +230,16 @@ public interface FsDatasetSpi extends FSDatasetMBean { */ VolumeFailureSummary getVolumeFailureSummary(); - /** @return a list of finalized blocks for the given block pool. */ + /** + * Gets a list of references to the finalized blocks for the given block pool. + *

+ * Callers of this function should call + * {@link FsDatasetSpi#acquireDatasetLock} to avoid blocks' status being + * changed during list iteration. + *

+ * @return a list of references to the finalized blocks for the given block + * pool. + */ List getFinalizedBlocks(String bpid); /** @return a list of finalized blocks for the given block pool. */ diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetImpl.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetImpl.java index 42256adbe5d..834e6cbe6b4 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetImpl.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetImpl.java @@ -1842,16 +1842,23 @@ class FsDatasetImpl implements FsDatasetSpi { } /** - * Get the list of finalized blocks from in-memory blockmap for a block pool. + * Gets a list of references to the finalized blocks for the given block pool. + *

+ * Callers of this function should call + * {@link FsDatasetSpi#acquireDatasetLock} to avoid blocks' status being + * changed during list iteration. + *

+ * @return a list of references to the finalized blocks for the given block + * pool. */ @Override public List getFinalizedBlocks(String bpid) { try(AutoCloseableLock lock = datasetLock.acquire()) { - ArrayList finalized = + final ArrayList finalized = new ArrayList(volumeMap.size(bpid)); for (ReplicaInfo b : volumeMap.replicas(bpid)) { if (b.getState() == ReplicaState.FINALIZED) { - finalized.add(new FinalizedReplica((FinalizedReplica) b)); + finalized.add((FinalizedReplica)b); } } return finalized;