HDFS-4461. DirectoryScanner: volume prefix takes up memory for every block that is scanned (Colin Patrick McCabe)

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1494401 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Colin McCabe 2013-06-19 00:34:08 +00:00
parent ab647cfcdb
commit 57006e1c8c
5 changed files with 192 additions and 10 deletions

View File

@ -290,6 +290,9 @@ Release 2.1.0-beta - UNRELEASED
IMPROVEMENTS IMPROVEMENTS
HDFS-4461. DirectoryScanner: volume path prefix takes up memory for every
block that is scanned (Colin Patrick McCabe)
HDFS-4222. NN is unresponsive and loses heartbeats from DNs when HDFS-4222. NN is unresponsive and loses heartbeats from DNs when
configured to use LDAP and LDAP has issues. (Xiaobo Peng, suresh) configured to use LDAP and LDAP has issues. (Xiaobo Peng, suresh)

View File

@ -33,6 +33,8 @@ import java.util.concurrent.Future;
import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.ScheduledThreadPoolExecutor; import java.util.concurrent.ScheduledThreadPoolExecutor;
import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeUnit;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.logging.Log; import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory; import org.apache.commons.logging.LogFactory;
@ -154,30 +156,109 @@ public class DirectoryScanner implements Runnable {
* Tracks the files and other information related to a block on the disk * Tracks the files and other information related to a block on the disk
* Missing file is indicated by setting the corresponding member * Missing file is indicated by setting the corresponding member
* to null. * to null.
*
* Because millions of these structures may be created, we try to save
* memory here. So instead of storing full paths, we store path suffixes.
* The block file, if it exists, will have a path like this:
* <volume_base_path>/<block_path>
* So we don't need to store the volume path, since we already know what the
* volume is.
*
* The metadata file, if it exists, will have a path like this:
* <volume_base_path>/<block_path>_<genstamp>.meta
* So if we have a block file, there isn't any need to store the block path
* again.
*
* The accessor functions take care of these manipulations.
*/ */
static class ScanInfo implements Comparable<ScanInfo> { static class ScanInfo implements Comparable<ScanInfo> {
private final long blockId; private final long blockId;
private final File metaFile;
private final File blockFile; /**
* The block file path, relative to the volume's base directory.
* If there was no block file found, this may be null. If 'vol'
* is null, then this is the full path of the block file.
*/
private final String blockSuffix;
/**
* The suffix of the meta file path relative to the block file.
* If blockSuffix is null, then this will be the entire path relative
* to the volume base directory, or an absolute path if vol is also
* null.
*/
private final String metaSuffix;
private final FsVolumeSpi volume; private final FsVolumeSpi volume;
private final static Pattern CONDENSED_PATH_REGEX =
Pattern.compile("(?<!^)(\\\\|/){2,}");
private final static String QUOTED_FILE_SEPARATOR =
Matcher.quoteReplacement(File.separator);
/**
* Get the most condensed version of the path.
*
* For example, the condensed version of /foo//bar is /foo/bar
* Unlike {@link File#getCanonicalPath()}, this will never perform I/O
* on the filesystem.
*/
private static String getCondensedPath(String path) {
return CONDENSED_PATH_REGEX.matcher(path).
replaceAll(QUOTED_FILE_SEPARATOR);
}
/**
* Get a path suffix.
*
* @param f The file to get the suffix for.
* @param prefix The prefix we're stripping off.
*
* @return A suffix such that prefix + suffix = path to f
*/
private static String getSuffix(File f, String prefix) {
String fullPath = getCondensedPath(f.getAbsolutePath());
if (fullPath.startsWith(prefix)) {
return fullPath.substring(prefix.length());
}
throw new RuntimeException(prefix + " is not a prefix of " + fullPath);
}
ScanInfo(long blockId) { ScanInfo(long blockId) {
this(blockId, null, null, null); this(blockId, null, null, null);
} }
ScanInfo(long blockId, File blockFile, File metaFile, FsVolumeSpi vol) { ScanInfo(long blockId, File blockFile, File metaFile, FsVolumeSpi vol) {
this.blockId = blockId; this.blockId = blockId;
this.metaFile = metaFile; String condensedVolPath = vol == null ? null :
this.blockFile = blockFile; getCondensedPath(vol.getBasePath());
this.blockSuffix = blockFile == null ? null :
getSuffix(blockFile, condensedVolPath);
if (metaFile == null) {
this.metaSuffix = null;
} else if (blockFile == null) {
this.metaSuffix = getSuffix(metaFile, condensedVolPath);
} else {
this.metaSuffix = getSuffix(metaFile,
condensedVolPath + blockSuffix);
}
this.volume = vol; this.volume = vol;
} }
File getMetaFile() { File getBlockFile() {
return metaFile; return (blockSuffix == null) ? null :
new File(volume.getBasePath(), blockSuffix);
} }
File getBlockFile() { File getMetaFile() {
return blockFile; if (metaSuffix == null) {
return null;
} else if (blockSuffix == null) {
return new File(volume.getBasePath(), metaSuffix);
} else {
return new File(volume.getBasePath(), blockSuffix + metaSuffix);
}
} }
long getBlockId() { long getBlockId() {
@ -216,7 +297,8 @@ public class DirectoryScanner implements Runnable {
} }
public long getGenStamp() { public long getGenStamp() {
return metaFile != null ? Block.getGenerationStamp(metaFile.getName()) : return metaSuffix != null ? Block.getGenerationStamp(
getMetaFile().getName()) :
GenerationStamp.GRANDFATHER_GENERATION_STAMP; GenerationStamp.GRANDFATHER_GENERATION_STAMP;
} }
} }

View File

@ -30,6 +30,9 @@ public interface FsVolumeSpi {
/** @return the available storage space in bytes. */ /** @return the available storage space in bytes. */
public long getAvailable() throws IOException; public long getAvailable() throws IOException;
/** @return the base path to the volume */
public String getBasePath();
/** @return the path to the volume */ /** @return the path to the volume */
public String getPath(String bpid) throws IOException; public String getPath(String bpid) throws IOException;

View File

@ -124,6 +124,11 @@ class FsVolumeImpl implements FsVolumeSpi {
return bp; return bp;
} }
@Override
public String getBasePath() {
return currentDir.getParent();
}
@Override @Override
public String getPath(String bpid) throws IOException { public String getPath(String bpid) throws IOException {
return getBlockPoolSlice(bpid).getDirectory().getAbsolutePath(); return getBlockPoolSlice(bpid).getDirectory().getAbsolutePath();

View File

@ -380,4 +380,93 @@ public class TestDirectoryScanner {
assertNotNull(memBlock); assertNotNull(memBlock);
assertEquals(genStamp, memBlock.getGenerationStamp()); assertEquals(genStamp, memBlock.getGenerationStamp());
} }
private static class TestFsVolumeSpi implements FsVolumeSpi {
@Override
public String[] getBlockPoolList() {
return new String[0];
}
@Override
public long getAvailable() throws IOException {
return 0;
}
@Override
public String getBasePath() {
return "/base";
}
@Override
public String getPath(String bpid) throws IOException {
return "/base/current/" + bpid;
}
@Override
public File getFinalizedDir(String bpid) throws IOException {
return new File("/base/current/" + bpid + "/finalized");
}
}
private final static TestFsVolumeSpi TEST_VOLUME = new TestFsVolumeSpi();
private final static String BPID_1 = "BP-783049782-127.0.0.1-1370971773491";
private final static String BPID_2 = "BP-367845636-127.0.0.1-5895645674231";
void testScanInfoObject(long blockId, File blockFile, File metaFile)
throws Exception {
assertEquals("/base/current/" + BPID_1 + "/finalized",
TEST_VOLUME.getFinalizedDir(BPID_1).getAbsolutePath());
DirectoryScanner.ScanInfo scanInfo =
new DirectoryScanner.ScanInfo(blockId, blockFile, metaFile, TEST_VOLUME);
assertEquals(blockId, scanInfo.getBlockId());
if (blockFile != null) {
assertEquals(blockFile.getAbsolutePath(),
scanInfo.getBlockFile().getAbsolutePath());
} else {
assertNull(scanInfo.getBlockFile());
}
if (metaFile != null) {
assertEquals(metaFile.getAbsolutePath(),
scanInfo.getMetaFile().getAbsolutePath());
} else {
assertNull(scanInfo.getMetaFile());
}
assertEquals(TEST_VOLUME, scanInfo.getVolume());
}
void testScanInfoObject(long blockId) throws Exception {
DirectoryScanner.ScanInfo scanInfo =
new DirectoryScanner.ScanInfo(blockId);
assertEquals(blockId, scanInfo.getBlockId());
assertNull(scanInfo.getBlockFile());
assertNull(scanInfo.getMetaFile());
}
@Test(timeout=120000)
public void TestScanInfo() throws Exception {
testScanInfoObject(123,
new File(TEST_VOLUME.getFinalizedDir(BPID_1).getAbsolutePath(),
"blk_123"),
new File(TEST_VOLUME.getFinalizedDir(BPID_1).getAbsolutePath(),
"blk_123__1001.meta"));
testScanInfoObject(464,
new File(TEST_VOLUME.getFinalizedDir(BPID_1).getAbsolutePath(),
"blk_123"),
null);
testScanInfoObject(523,
null,
new File(TEST_VOLUME.getFinalizedDir(BPID_1).getAbsolutePath(),
"blk_123__1009.meta"));
testScanInfoObject(789,
null,
null);
testScanInfoObject(456);
testScanInfoObject(123,
new File(TEST_VOLUME.getFinalizedDir(BPID_2).getAbsolutePath(),
"blk_567"),
new File(TEST_VOLUME.getFinalizedDir(BPID_2).getAbsolutePath(),
"blk_567__1004.meta"));
}
} }