HDFS-4461. DirectoryScanner: volume prefix takes up memory for every block that is scanned (Colin Patrick McCabe)
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1494401 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
ab647cfcdb
commit
57006e1c8c
|
@ -290,6 +290,9 @@ Release 2.1.0-beta - UNRELEASED
|
|||
|
||||
IMPROVEMENTS
|
||||
|
||||
HDFS-4461. DirectoryScanner: volume path prefix takes up memory for every
|
||||
block that is scanned (Colin Patrick McCabe)
|
||||
|
||||
HDFS-4222. NN is unresponsive and loses heartbeats from DNs when
|
||||
configured to use LDAP and LDAP has issues. (Xiaobo Peng, suresh)
|
||||
|
||||
|
|
|
@ -33,6 +33,8 @@ import java.util.concurrent.Future;
|
|||
import java.util.concurrent.ScheduledExecutorService;
|
||||
import java.util.concurrent.ScheduledThreadPoolExecutor;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
|
@ -154,30 +156,109 @@ public class DirectoryScanner implements Runnable {
|
|||
* Tracks the files and other information related to a block on the disk
|
||||
* Missing file is indicated by setting the corresponding member
|
||||
* to null.
|
||||
*
|
||||
* Because millions of these structures may be created, we try to save
|
||||
* memory here. So instead of storing full paths, we store path suffixes.
|
||||
* The block file, if it exists, will have a path like this:
|
||||
* <volume_base_path>/<block_path>
|
||||
* So we don't need to store the volume path, since we already know what the
|
||||
* volume is.
|
||||
*
|
||||
* The metadata file, if it exists, will have a path like this:
|
||||
* <volume_base_path>/<block_path>_<genstamp>.meta
|
||||
* So if we have a block file, there isn't any need to store the block path
|
||||
* again.
|
||||
*
|
||||
* The accessor functions take care of these manipulations.
|
||||
*/
|
||||
static class ScanInfo implements Comparable<ScanInfo> {
|
||||
private final long blockId;
|
||||
private final File metaFile;
|
||||
private final File blockFile;
|
||||
|
||||
/**
|
||||
* The block file path, relative to the volume's base directory.
|
||||
* If there was no block file found, this may be null. If 'vol'
|
||||
* is null, then this is the full path of the block file.
|
||||
*/
|
||||
private final String blockSuffix;
|
||||
|
||||
/**
|
||||
* The suffix of the meta file path relative to the block file.
|
||||
* If blockSuffix is null, then this will be the entire path relative
|
||||
* to the volume base directory, or an absolute path if vol is also
|
||||
* null.
|
||||
*/
|
||||
private final String metaSuffix;
|
||||
|
||||
private final FsVolumeSpi volume;
|
||||
|
||||
private final static Pattern CONDENSED_PATH_REGEX =
|
||||
Pattern.compile("(?<!^)(\\\\|/){2,}");
|
||||
|
||||
private final static String QUOTED_FILE_SEPARATOR =
|
||||
Matcher.quoteReplacement(File.separator);
|
||||
|
||||
/**
|
||||
* Get the most condensed version of the path.
|
||||
*
|
||||
* For example, the condensed version of /foo//bar is /foo/bar
|
||||
* Unlike {@link File#getCanonicalPath()}, this will never perform I/O
|
||||
* on the filesystem.
|
||||
*/
|
||||
private static String getCondensedPath(String path) {
|
||||
return CONDENSED_PATH_REGEX.matcher(path).
|
||||
replaceAll(QUOTED_FILE_SEPARATOR);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a path suffix.
|
||||
*
|
||||
* @param f The file to get the suffix for.
|
||||
* @param prefix The prefix we're stripping off.
|
||||
*
|
||||
* @return A suffix such that prefix + suffix = path to f
|
||||
*/
|
||||
private static String getSuffix(File f, String prefix) {
|
||||
String fullPath = getCondensedPath(f.getAbsolutePath());
|
||||
if (fullPath.startsWith(prefix)) {
|
||||
return fullPath.substring(prefix.length());
|
||||
}
|
||||
throw new RuntimeException(prefix + " is not a prefix of " + fullPath);
|
||||
}
|
||||
|
||||
ScanInfo(long blockId) {
|
||||
this(blockId, null, null, null);
|
||||
}
|
||||
|
||||
ScanInfo(long blockId, File blockFile, File metaFile, FsVolumeSpi vol) {
|
||||
this.blockId = blockId;
|
||||
this.metaFile = metaFile;
|
||||
this.blockFile = blockFile;
|
||||
String condensedVolPath = vol == null ? null :
|
||||
getCondensedPath(vol.getBasePath());
|
||||
this.blockSuffix = blockFile == null ? null :
|
||||
getSuffix(blockFile, condensedVolPath);
|
||||
if (metaFile == null) {
|
||||
this.metaSuffix = null;
|
||||
} else if (blockFile == null) {
|
||||
this.metaSuffix = getSuffix(metaFile, condensedVolPath);
|
||||
} else {
|
||||
this.metaSuffix = getSuffix(metaFile,
|
||||
condensedVolPath + blockSuffix);
|
||||
}
|
||||
this.volume = vol;
|
||||
}
|
||||
|
||||
File getMetaFile() {
|
||||
return metaFile;
|
||||
File getBlockFile() {
|
||||
return (blockSuffix == null) ? null :
|
||||
new File(volume.getBasePath(), blockSuffix);
|
||||
}
|
||||
|
||||
File getBlockFile() {
|
||||
return blockFile;
|
||||
File getMetaFile() {
|
||||
if (metaSuffix == null) {
|
||||
return null;
|
||||
} else if (blockSuffix == null) {
|
||||
return new File(volume.getBasePath(), metaSuffix);
|
||||
} else {
|
||||
return new File(volume.getBasePath(), blockSuffix + metaSuffix);
|
||||
}
|
||||
}
|
||||
|
||||
long getBlockId() {
|
||||
|
@ -216,7 +297,8 @@ public class DirectoryScanner implements Runnable {
|
|||
}
|
||||
|
||||
public long getGenStamp() {
|
||||
return metaFile != null ? Block.getGenerationStamp(metaFile.getName()) :
|
||||
return metaSuffix != null ? Block.getGenerationStamp(
|
||||
getMetaFile().getName()) :
|
||||
GenerationStamp.GRANDFATHER_GENERATION_STAMP;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -30,6 +30,9 @@ public interface FsVolumeSpi {
|
|||
/** @return the available storage space in bytes. */
|
||||
public long getAvailable() throws IOException;
|
||||
|
||||
/** @return the base path to the volume */
|
||||
public String getBasePath();
|
||||
|
||||
/** @return the path to the volume */
|
||||
public String getPath(String bpid) throws IOException;
|
||||
|
||||
|
|
|
@ -124,6 +124,11 @@ class FsVolumeImpl implements FsVolumeSpi {
|
|||
return bp;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getBasePath() {
|
||||
return currentDir.getParent();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getPath(String bpid) throws IOException {
|
||||
return getBlockPoolSlice(bpid).getDirectory().getAbsolutePath();
|
||||
|
|
|
@ -380,4 +380,93 @@ public class TestDirectoryScanner {
|
|||
assertNotNull(memBlock);
|
||||
assertEquals(genStamp, memBlock.getGenerationStamp());
|
||||
}
|
||||
|
||||
private static class TestFsVolumeSpi implements FsVolumeSpi {
|
||||
@Override
|
||||
public String[] getBlockPoolList() {
|
||||
return new String[0];
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getAvailable() throws IOException {
|
||||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getBasePath() {
|
||||
return "/base";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getPath(String bpid) throws IOException {
|
||||
return "/base/current/" + bpid;
|
||||
}
|
||||
|
||||
@Override
|
||||
public File getFinalizedDir(String bpid) throws IOException {
|
||||
return new File("/base/current/" + bpid + "/finalized");
|
||||
}
|
||||
}
|
||||
|
||||
private final static TestFsVolumeSpi TEST_VOLUME = new TestFsVolumeSpi();
|
||||
|
||||
private final static String BPID_1 = "BP-783049782-127.0.0.1-1370971773491";
|
||||
|
||||
private final static String BPID_2 = "BP-367845636-127.0.0.1-5895645674231";
|
||||
|
||||
void testScanInfoObject(long blockId, File blockFile, File metaFile)
|
||||
throws Exception {
|
||||
assertEquals("/base/current/" + BPID_1 + "/finalized",
|
||||
TEST_VOLUME.getFinalizedDir(BPID_1).getAbsolutePath());
|
||||
DirectoryScanner.ScanInfo scanInfo =
|
||||
new DirectoryScanner.ScanInfo(blockId, blockFile, metaFile, TEST_VOLUME);
|
||||
assertEquals(blockId, scanInfo.getBlockId());
|
||||
if (blockFile != null) {
|
||||
assertEquals(blockFile.getAbsolutePath(),
|
||||
scanInfo.getBlockFile().getAbsolutePath());
|
||||
} else {
|
||||
assertNull(scanInfo.getBlockFile());
|
||||
}
|
||||
if (metaFile != null) {
|
||||
assertEquals(metaFile.getAbsolutePath(),
|
||||
scanInfo.getMetaFile().getAbsolutePath());
|
||||
} else {
|
||||
assertNull(scanInfo.getMetaFile());
|
||||
}
|
||||
assertEquals(TEST_VOLUME, scanInfo.getVolume());
|
||||
}
|
||||
|
||||
void testScanInfoObject(long blockId) throws Exception {
|
||||
DirectoryScanner.ScanInfo scanInfo =
|
||||
new DirectoryScanner.ScanInfo(blockId);
|
||||
assertEquals(blockId, scanInfo.getBlockId());
|
||||
assertNull(scanInfo.getBlockFile());
|
||||
assertNull(scanInfo.getMetaFile());
|
||||
}
|
||||
|
||||
@Test(timeout=120000)
|
||||
public void TestScanInfo() throws Exception {
|
||||
testScanInfoObject(123,
|
||||
new File(TEST_VOLUME.getFinalizedDir(BPID_1).getAbsolutePath(),
|
||||
"blk_123"),
|
||||
new File(TEST_VOLUME.getFinalizedDir(BPID_1).getAbsolutePath(),
|
||||
"blk_123__1001.meta"));
|
||||
testScanInfoObject(464,
|
||||
new File(TEST_VOLUME.getFinalizedDir(BPID_1).getAbsolutePath(),
|
||||
"blk_123"),
|
||||
null);
|
||||
testScanInfoObject(523,
|
||||
null,
|
||||
new File(TEST_VOLUME.getFinalizedDir(BPID_1).getAbsolutePath(),
|
||||
"blk_123__1009.meta"));
|
||||
testScanInfoObject(789,
|
||||
null,
|
||||
null);
|
||||
testScanInfoObject(456);
|
||||
testScanInfoObject(123,
|
||||
new File(TEST_VOLUME.getFinalizedDir(BPID_2).getAbsolutePath(),
|
||||
"blk_567"),
|
||||
new File(TEST_VOLUME.getFinalizedDir(BPID_2).getAbsolutePath(),
|
||||
"blk_567__1004.meta"));
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue