HDFS-4461. DirectoryScanner: volume prefix takes up memory for every block that is scanned (Colin Patrick McCabe)

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-2@1494403 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Colin McCabe 2013-06-19 00:40:22 +00:00
parent 5de952dca0
commit b5d2b0a121
5 changed files with 192 additions and 10 deletions

View File

@ -79,6 +79,9 @@ Release 2.1.0-beta - UNRELEASED
IMPROVEMENTS
HDFS-4461. DirectoryScanner: volume path prefix takes up memory for every
block that is scanned (Colin Patrick McCabe)
HDFS-4222. NN is unresponsive and loses heartbeats from DNs when
configured to use LDAP and LDAP has issues. (Xiaobo Peng, suresh)

View File

@ -33,6 +33,8 @@ import java.util.concurrent.Future;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.ScheduledThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
@ -154,30 +156,109 @@ public class DirectoryScanner implements Runnable {
* Tracks the files and other information related to a block on the disk
* Missing file is indicated by setting the corresponding member
* to null.
*
* Because millions of these structures may be created, we try to save
* memory here. So instead of storing full paths, we store path suffixes.
* The block file, if it exists, will have a path like this:
* <volume_base_path>/<block_path>
* So we don't need to store the volume path, since we already know what the
* volume is.
*
* The metadata file, if it exists, will have a path like this:
* <volume_base_path>/<block_path>_<genstamp>.meta
* So if we have a block file, there isn't any need to store the block path
* again.
*
* The accessor functions take care of these manipulations.
*/
static class ScanInfo implements Comparable<ScanInfo> {
private final long blockId;
private final File metaFile;
private final File blockFile;
/**
* The block file path, relative to the volume's base directory.
* If there was no block file found, this may be null. If 'vol'
* is null, then this is the full path of the block file.
*/
private final String blockSuffix;
/**
* The suffix of the meta file path relative to the block file.
* If blockSuffix is null, then this will be the entire path relative
* to the volume base directory, or an absolute path if vol is also
* null.
*/
private final String metaSuffix;
private final FsVolumeSpi volume;
private final static Pattern CONDENSED_PATH_REGEX =
Pattern.compile("(?<!^)(\\\\|/){2,}");
private final static String QUOTED_FILE_SEPARATOR =
Matcher.quoteReplacement(File.separator);
/**
* Get the most condensed version of the path.
*
* For example, the condensed version of /foo//bar is /foo/bar
* Unlike {@link File#getCanonicalPath()}, this will never perform I/O
* on the filesystem.
*/
private static String getCondensedPath(String path) {
return CONDENSED_PATH_REGEX.matcher(path).
replaceAll(QUOTED_FILE_SEPARATOR);
}
/**
* Get a path suffix.
*
* @param f The file to get the suffix for.
* @param prefix The prefix we're stripping off.
*
* @return A suffix such that prefix + suffix = path to f
*/
private static String getSuffix(File f, String prefix) {
String fullPath = getCondensedPath(f.getAbsolutePath());
if (fullPath.startsWith(prefix)) {
return fullPath.substring(prefix.length());
}
throw new RuntimeException(prefix + " is not a prefix of " + fullPath);
}
ScanInfo(long blockId) {
this(blockId, null, null, null);
}
ScanInfo(long blockId, File blockFile, File metaFile, FsVolumeSpi vol) {
this.blockId = blockId;
this.metaFile = metaFile;
this.blockFile = blockFile;
String condensedVolPath = vol == null ? null :
getCondensedPath(vol.getBasePath());
this.blockSuffix = blockFile == null ? null :
getSuffix(blockFile, condensedVolPath);
if (metaFile == null) {
this.metaSuffix = null;
} else if (blockFile == null) {
this.metaSuffix = getSuffix(metaFile, condensedVolPath);
} else {
this.metaSuffix = getSuffix(metaFile,
condensedVolPath + blockSuffix);
}
this.volume = vol;
}
File getMetaFile() {
return metaFile;
File getBlockFile() {
return (blockSuffix == null) ? null :
new File(volume.getBasePath(), blockSuffix);
}
File getBlockFile() {
return blockFile;
File getMetaFile() {
if (metaSuffix == null) {
return null;
} else if (blockSuffix == null) {
return new File(volume.getBasePath(), metaSuffix);
} else {
return new File(volume.getBasePath(), blockSuffix + metaSuffix);
}
}
long getBlockId() {
@ -216,8 +297,9 @@ public class DirectoryScanner implements Runnable {
}
public long getGenStamp() {
return metaFile != null ? Block.getGenerationStamp(metaFile.getName()) :
GenerationStamp.GRANDFATHER_GENERATION_STAMP;
return metaSuffix != null ? Block.getGenerationStamp(
getMetaFile().getName()) :
GenerationStamp.GRANDFATHER_GENERATION_STAMP;
}
}

View File

@ -30,6 +30,9 @@ public interface FsVolumeSpi {
/** @return the available storage space in bytes. */
public long getAvailable() throws IOException;
/** @return the base path to the volume */
public String getBasePath();
/** @return the path to the volume */
public String getPath(String bpid) throws IOException;

View File

@ -124,6 +124,11 @@ class FsVolumeImpl implements FsVolumeSpi {
return bp;
}
@Override
public String getBasePath() {
return currentDir.getParent();
}
@Override
public String getPath(String bpid) throws IOException {
return getBlockPoolSlice(bpid).getDirectory().getAbsolutePath();

View File

@ -380,4 +380,93 @@ public class TestDirectoryScanner {
assertNotNull(memBlock);
assertEquals(genStamp, memBlock.getGenerationStamp());
}
private static class TestFsVolumeSpi implements FsVolumeSpi {
@Override
public String[] getBlockPoolList() {
return new String[0];
}
@Override
public long getAvailable() throws IOException {
return 0;
}
@Override
public String getBasePath() {
return "/base";
}
@Override
public String getPath(String bpid) throws IOException {
return "/base/current/" + bpid;
}
@Override
public File getFinalizedDir(String bpid) throws IOException {
return new File("/base/current/" + bpid + "/finalized");
}
}
private final static TestFsVolumeSpi TEST_VOLUME = new TestFsVolumeSpi();
private final static String BPID_1 = "BP-783049782-127.0.0.1-1370971773491";
private final static String BPID_2 = "BP-367845636-127.0.0.1-5895645674231";
void testScanInfoObject(long blockId, File blockFile, File metaFile)
throws Exception {
assertEquals("/base/current/" + BPID_1 + "/finalized",
TEST_VOLUME.getFinalizedDir(BPID_1).getAbsolutePath());
DirectoryScanner.ScanInfo scanInfo =
new DirectoryScanner.ScanInfo(blockId, blockFile, metaFile, TEST_VOLUME);
assertEquals(blockId, scanInfo.getBlockId());
if (blockFile != null) {
assertEquals(blockFile.getAbsolutePath(),
scanInfo.getBlockFile().getAbsolutePath());
} else {
assertNull(scanInfo.getBlockFile());
}
if (metaFile != null) {
assertEquals(metaFile.getAbsolutePath(),
scanInfo.getMetaFile().getAbsolutePath());
} else {
assertNull(scanInfo.getMetaFile());
}
assertEquals(TEST_VOLUME, scanInfo.getVolume());
}
void testScanInfoObject(long blockId) throws Exception {
DirectoryScanner.ScanInfo scanInfo =
new DirectoryScanner.ScanInfo(blockId);
assertEquals(blockId, scanInfo.getBlockId());
assertNull(scanInfo.getBlockFile());
assertNull(scanInfo.getMetaFile());
}
@Test(timeout=120000)
public void TestScanInfo() throws Exception {
testScanInfoObject(123,
new File(TEST_VOLUME.getFinalizedDir(BPID_1).getAbsolutePath(),
"blk_123"),
new File(TEST_VOLUME.getFinalizedDir(BPID_1).getAbsolutePath(),
"blk_123__1001.meta"));
testScanInfoObject(464,
new File(TEST_VOLUME.getFinalizedDir(BPID_1).getAbsolutePath(),
"blk_123"),
null);
testScanInfoObject(523,
null,
new File(TEST_VOLUME.getFinalizedDir(BPID_1).getAbsolutePath(),
"blk_123__1009.meta"));
testScanInfoObject(789,
null,
null);
testScanInfoObject(456);
testScanInfoObject(123,
new File(TEST_VOLUME.getFinalizedDir(BPID_2).getAbsolutePath(),
"blk_567"),
new File(TEST_VOLUME.getFinalizedDir(BPID_2).getAbsolutePath(),
"blk_567__1004.meta"));
}
}