From b5d2b0a121da27a032a04751d23146bf2642aab5 Mon Sep 17 00:00:00 2001 From: Colin McCabe Date: Wed, 19 Jun 2013 00:40:22 +0000 Subject: [PATCH] HDFS-4461. DirectoryScanner: volume prefix takes up memory for every block that is scanned (Colin Patrick McCabe) git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-2@1494403 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 + .../server/datanode/DirectoryScanner.java | 102 ++++++++++++++++-- .../datanode/fsdataset/FsVolumeSpi.java | 3 + .../datanode/fsdataset/impl/FsVolumeImpl.java | 5 + .../server/datanode/TestDirectoryScanner.java | 89 +++++++++++++++ 5 files changed, 192 insertions(+), 10 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 6b6397f5789..4e274766d13 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -79,6 +79,9 @@ Release 2.1.0-beta - UNRELEASED IMPROVEMENTS + HDFS-4461. DirectoryScanner: volume path prefix takes up memory for every + block that is scanned (Colin Patrick McCabe) + HDFS-4222. NN is unresponsive and loses heartbeats from DNs when configured to use LDAP and LDAP has issues. (Xiaobo Peng, suresh) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DirectoryScanner.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DirectoryScanner.java index 5d870d771e8..92f5d63dc2a 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DirectoryScanner.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DirectoryScanner.java @@ -33,6 +33,8 @@ import java.util.concurrent.Future; import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.ScheduledThreadPoolExecutor; import java.util.concurrent.TimeUnit; +import java.util.regex.Matcher; +import java.util.regex.Pattern; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -154,30 +156,109 @@ public class DirectoryScanner implements Runnable { * Tracks the files and other information related to a block on the disk * Missing file is indicated by setting the corresponding member * to null. + * + * Because millions of these structures may be created, we try to save + * memory here. So instead of storing full paths, we store path suffixes. + * The block file, if it exists, will have a path like this: + * / + * So we don't need to store the volume path, since we already know what the + * volume is. + * + * The metadata file, if it exists, will have a path like this: + * /_.meta + * So if we have a block file, there isn't any need to store the block path + * again. + * + * The accessor functions take care of these manipulations. */ static class ScanInfo implements Comparable { private final long blockId; - private final File metaFile; - private final File blockFile; + + /** + * The block file path, relative to the volume's base directory. + * If there was no block file found, this may be null. If 'vol' + * is null, then this is the full path of the block file. + */ + private final String blockSuffix; + + /** + * The suffix of the meta file path relative to the block file. + * If blockSuffix is null, then this will be the entire path relative + * to the volume base directory, or an absolute path if vol is also + * null. + */ + private final String metaSuffix; + private final FsVolumeSpi volume; + private final static Pattern CONDENSED_PATH_REGEX = + Pattern.compile("(?