HADOOP-6467. Improve the performance on HarFileSystem.listStatus(..). Contributed by mahadev
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@915168 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
c5622e5d4d
commit
4eedc77275
@ -163,6 +163,9 @@ Trunk (unreleased changes)
|
|||||||
|
|
||||||
OPTIMIZATIONS
|
OPTIMIZATIONS
|
||||||
|
|
||||||
|
HADOOP-6467. Improve the performance on HarFileSystem.listStatus(..).
|
||||||
|
(mahadev via szetszwo)
|
||||||
|
|
||||||
BUG FIXES
|
BUG FIXES
|
||||||
|
|
||||||
HADOOP-6293. Fix FsShell -text to work on filesystems other than the
|
HADOOP-6293. Fix FsShell -text to work on filesystems other than the
|
||||||
|
@ -325,25 +325,12 @@ public Path makeQualified(Path path) {
|
|||||||
@Override
|
@Override
|
||||||
public BlockLocation[] getFileBlockLocations(FileStatus file, long start,
|
public BlockLocation[] getFileBlockLocations(FileStatus file, long start,
|
||||||
long len) throws IOException {
|
long len) throws IOException {
|
||||||
// need to look up the file in the underlying fs
|
// just fake block locations
|
||||||
// look up the index
|
// its fast and simpler
|
||||||
|
// doing various block location manipulation
|
||||||
// make sure this is a prt of this har filesystem
|
// with part files adds a lot of overhead because
|
||||||
Path p = makeQualified(file.getPath());
|
// of the look ups of filestatus in index files
|
||||||
Path harPath = getPathInHar(p);
|
return new BlockLocation[]{ new BlockLocation() };
|
||||||
String line = fileStatusInIndex(harPath);
|
|
||||||
if (line == null) {
|
|
||||||
throw new FileNotFoundException("File " + file.getPath() + " not found");
|
|
||||||
}
|
|
||||||
HarStatus harStatus = new HarStatus(line);
|
|
||||||
if (harStatus.isDir()) {
|
|
||||||
return new BlockLocation[0];
|
|
||||||
}
|
|
||||||
FileStatus fsFile = fs.getFileStatus(new Path(archivePath,
|
|
||||||
harStatus.getPartName()));
|
|
||||||
BlockLocation[] rawBlocks = fs.getFileBlockLocations(fsFile,
|
|
||||||
harStatus.getStartIndex() + start, len);
|
|
||||||
return fakeBlockLocations(rawBlocks, harStatus.getStartIndex());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -387,6 +374,63 @@ public Store(long begin, long end, int startHash, int endHash) {
|
|||||||
public int endHash;
|
public int endHash;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get filestatuses of all the children of a given directory. This just reads
|
||||||
|
* through index file and reads line by line to get all statuses for children
|
||||||
|
* of a directory. Its a brute force way of getting all such filestatuses
|
||||||
|
*
|
||||||
|
* @param parent
|
||||||
|
* the parent path directory
|
||||||
|
* @param statuses
|
||||||
|
* the list to add the children filestatuses to
|
||||||
|
* @param children
|
||||||
|
* the string list of children for this parent
|
||||||
|
* @param archiveIndexStat
|
||||||
|
* the archive index filestatus
|
||||||
|
*/
|
||||||
|
private void fileStatusesInIndex(HarStatus parent, List<FileStatus> statuses,
|
||||||
|
List<String> children, FileStatus archiveIndexStat) throws IOException {
|
||||||
|
// read the index file
|
||||||
|
FSDataInputStream aIn = null;
|
||||||
|
try {
|
||||||
|
aIn = fs.open(archiveIndex);
|
||||||
|
LineReader aLin;
|
||||||
|
long read = 0;
|
||||||
|
aLin = new LineReader(aIn, getConf());
|
||||||
|
String parentString = parent.getName();
|
||||||
|
Path harPath = new Path(parentString);
|
||||||
|
int harlen = harPath.depth();
|
||||||
|
Text line = new Text();
|
||||||
|
while (read < archiveIndexStat.getLen()) {
|
||||||
|
int tmp = aLin.readLine(line);
|
||||||
|
read += tmp;
|
||||||
|
String lineFeed = line.toString();
|
||||||
|
String child = lineFeed.substring(0, lineFeed.indexOf(" "));
|
||||||
|
if ((child.startsWith(parentString))) {
|
||||||
|
Path thisPath = new Path(child);
|
||||||
|
if (thisPath.depth() == harlen + 1) {
|
||||||
|
// bingo!
|
||||||
|
HarStatus hstatus = new HarStatus(lineFeed);
|
||||||
|
FileStatus childStatus = new FileStatus(hstatus.isDir() ? 0
|
||||||
|
: hstatus.getLength(), hstatus.isDir(), (int) archiveIndexStat
|
||||||
|
.getReplication(), archiveIndexStat.getBlockSize(),
|
||||||
|
archiveIndexStat.getModificationTime(), archiveIndexStat
|
||||||
|
.getAccessTime(), new FsPermission(archiveIndexStat
|
||||||
|
.getPermission()), archiveIndexStat.getOwner(),
|
||||||
|
archiveIndexStat.getGroup(), makeRelative(this.uri.toString(),
|
||||||
|
new Path(hstatus.name)));
|
||||||
|
statuses.add(childStatus);
|
||||||
|
}
|
||||||
|
line.clear();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
if (aIn != null) {
|
||||||
|
aIn.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// make sure that this harPath is relative to the har filesystem
|
// make sure that this harPath is relative to the har filesystem
|
||||||
// this only works for relative paths. This returns the line matching
|
// this only works for relative paths. This returns the line matching
|
||||||
// the file in the index. Returns a null if there is not matching
|
// the file in the index. Returns a null if there is not matching
|
||||||
@ -650,10 +694,8 @@ public FileStatus[] listStatus(Path f) throws IOException {
|
|||||||
archiveStatus.getOwner(), archiveStatus.getGroup(),
|
archiveStatus.getOwner(), archiveStatus.getGroup(),
|
||||||
makeRelative(this.uri.toString(), new Path(hstatus.name))));
|
makeRelative(this.uri.toString(), new Path(hstatus.name))));
|
||||||
else
|
else
|
||||||
for (String child: hstatus.children) {
|
fileStatusesInIndex(hstatus, statuses, hstatus.children, archiveStatus);
|
||||||
FileStatus tmp = getFileStatus(new Path(tmpPath, child));
|
|
||||||
statuses.add(tmp);
|
|
||||||
}
|
|
||||||
return statuses.toArray(new FileStatus[statuses.size()]);
|
return statuses.toArray(new FileStatus[statuses.size()]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user