HADOOP-6467. Improve the performance on HarFileSystem.listStatus(..). Contributed by mahadev

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@915168 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Tsz-wo Sze 2010-02-23 03:54:14 +00:00
parent c5622e5d4d
commit 4eedc77275
2 changed files with 68 additions and 23 deletions

View File

@ -163,6 +163,9 @@ Trunk (unreleased changes)
OPTIMIZATIONS OPTIMIZATIONS
HADOOP-6467. Improve the performance on HarFileSystem.listStatus(..).
(mahadev via szetszwo)
BUG FIXES BUG FIXES
HADOOP-6293. Fix FsShell -text to work on filesystems other than the HADOOP-6293. Fix FsShell -text to work on filesystems other than the

View File

@ -325,25 +325,12 @@ public Path makeQualified(Path path) {
@Override @Override
public BlockLocation[] getFileBlockLocations(FileStatus file, long start, public BlockLocation[] getFileBlockLocations(FileStatus file, long start,
long len) throws IOException { long len) throws IOException {
// need to look up the file in the underlying fs // just fake block locations
// look up the index // its fast and simpler
// doing various block location manipulation
// make sure this is a prt of this har filesystem // with part files adds a lot of overhead because
Path p = makeQualified(file.getPath()); // of the look ups of filestatus in index files
Path harPath = getPathInHar(p); return new BlockLocation[]{ new BlockLocation() };
String line = fileStatusInIndex(harPath);
if (line == null) {
throw new FileNotFoundException("File " + file.getPath() + " not found");
}
HarStatus harStatus = new HarStatus(line);
if (harStatus.isDir()) {
return new BlockLocation[0];
}
FileStatus fsFile = fs.getFileStatus(new Path(archivePath,
harStatus.getPartName()));
BlockLocation[] rawBlocks = fs.getFileBlockLocations(fsFile,
harStatus.getStartIndex() + start, len);
return fakeBlockLocations(rawBlocks, harStatus.getStartIndex());
} }
/** /**
@ -387,6 +374,63 @@ public Store(long begin, long end, int startHash, int endHash) {
public int endHash; public int endHash;
} }
/**
* Get filestatuses of all the children of a given directory. This just reads
* through index file and reads line by line to get all statuses for children
* of a directory. Its a brute force way of getting all such filestatuses
*
* @param parent
* the parent path directory
* @param statuses
* the list to add the children filestatuses to
* @param children
* the string list of children for this parent
* @param archiveIndexStat
* the archive index filestatus
*/
private void fileStatusesInIndex(HarStatus parent, List<FileStatus> statuses,
List<String> children, FileStatus archiveIndexStat) throws IOException {
// read the index file
FSDataInputStream aIn = null;
try {
aIn = fs.open(archiveIndex);
LineReader aLin;
long read = 0;
aLin = new LineReader(aIn, getConf());
String parentString = parent.getName();
Path harPath = new Path(parentString);
int harlen = harPath.depth();
Text line = new Text();
while (read < archiveIndexStat.getLen()) {
int tmp = aLin.readLine(line);
read += tmp;
String lineFeed = line.toString();
String child = lineFeed.substring(0, lineFeed.indexOf(" "));
if ((child.startsWith(parentString))) {
Path thisPath = new Path(child);
if (thisPath.depth() == harlen + 1) {
// bingo!
HarStatus hstatus = new HarStatus(lineFeed);
FileStatus childStatus = new FileStatus(hstatus.isDir() ? 0
: hstatus.getLength(), hstatus.isDir(), (int) archiveIndexStat
.getReplication(), archiveIndexStat.getBlockSize(),
archiveIndexStat.getModificationTime(), archiveIndexStat
.getAccessTime(), new FsPermission(archiveIndexStat
.getPermission()), archiveIndexStat.getOwner(),
archiveIndexStat.getGroup(), makeRelative(this.uri.toString(),
new Path(hstatus.name)));
statuses.add(childStatus);
}
line.clear();
}
}
} finally {
if (aIn != null) {
aIn.close();
}
}
}
// make sure that this harPath is relative to the har filesystem // make sure that this harPath is relative to the har filesystem
// this only works for relative paths. This returns the line matching // this only works for relative paths. This returns the line matching
// the file in the index. Returns a null if there is not matching // the file in the index. Returns a null if there is not matching
@ -650,10 +694,8 @@ public FileStatus[] listStatus(Path f) throws IOException {
archiveStatus.getOwner(), archiveStatus.getGroup(), archiveStatus.getOwner(), archiveStatus.getGroup(),
makeRelative(this.uri.toString(), new Path(hstatus.name)))); makeRelative(this.uri.toString(), new Path(hstatus.name))));
else else
for (String child: hstatus.children) { fileStatusesInIndex(hstatus, statuses, hstatus.children, archiveStatus);
FileStatus tmp = getFileStatus(new Path(tmpPath, child));
statuses.add(tmp);
}
return statuses.toArray(new FileStatus[statuses.size()]); return statuses.toArray(new FileStatus[statuses.size()]);
} }