HADOOP-6591. HarFileSystem can handle paths with the whitespace characters.

(Rodrigo Schmidt via dhruba)



git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@923619 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Dhruba Borthakur 2010-03-16 08:03:38 +00:00
parent 6e86a671e0
commit 637cf89e18
2 changed files with 22 additions and 6 deletions

View File

@ -190,6 +190,9 @@ Trunk (unreleased changes)
HADOOP-6486. fix common classes to work with Avro 1.3 reflection. HADOOP-6486. fix common classes to work with Avro 1.3 reflection.
(cutting via tomwhite) (cutting via tomwhite)
HADOOP-6591. HarFileSystem can handle paths with the whitespace characters.
(Rodrigo Schmidt via dhruba)
OPTIMIZATIONS OPTIMIZATIONS
HADOOP-6467. Improve the performance on HarFileSystem.listStatus(..). HADOOP-6467. Improve the performance on HarFileSystem.listStatus(..).

View File

@ -19,8 +19,10 @@ package org.apache.hadoop.fs;
import java.io.FileNotFoundException; import java.io.FileNotFoundException;
import java.io.IOException; import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.net.URI; import java.net.URI;
import java.net.URISyntaxException; import java.net.URISyntaxException;
import java.net.URLDecoder;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.EnumSet; import java.util.EnumSet;
import java.util.List; import java.util.List;
@ -45,7 +47,7 @@ import org.apache.hadoop.util.Progressable;
*/ */
public class HarFileSystem extends FilterFileSystem { public class HarFileSystem extends FilterFileSystem {
public static final int VERSION = 1; public static final int VERSION = 2;
// uri representation of this Har filesystem // uri representation of this Har filesystem
private URI uri; private URI uri;
// the version of this har filesystem // the version of this har filesystem
@ -121,7 +123,8 @@ public class HarFileSystem extends FilterFileSystem {
throw new IOException("Unable to " + throw new IOException("Unable to " +
"read the version of the Har file system: " + this.archivePath); "read the version of the Har file system: " + this.archivePath);
} }
if (this.version != HarFileSystem.VERSION) { // make it always backwards-compatible
if (this.version > HarFileSystem.VERSION) {
throw new IOException("Invalid version " + throw new IOException("Invalid version " +
this.version + " expected " + HarFileSystem.VERSION); this.version + " expected " + HarFileSystem.VERSION);
} }
@ -213,6 +216,15 @@ public class HarFileSystem extends FilterFileSystem {
return tmp; return tmp;
} }
private String decodeFileName(String fname)
throws UnsupportedEncodingException {
if (version == 2){
return URLDecoder.decode(fname, "UTF-8");
}
return fname;
}
/** /**
* return the top level archive. * return the top level archive.
*/ */
@ -479,6 +491,7 @@ public class HarFileSystem extends FilterFileSystem {
read += tmp; read += tmp;
String lineFeed = line.toString(); String lineFeed = line.toString();
String[] parsed = lineFeed.split(" "); String[] parsed = lineFeed.split(" ");
parsed[0] = decodeFileName(parsed[0]);
if (harPath.compareTo(new Path(parsed[0])) == 0) { if (harPath.compareTo(new Path(parsed[0])) == 0) {
// bingo! // bingo!
retStr = lineFeed; retStr = lineFeed;
@ -502,16 +515,16 @@ public class HarFileSystem extends FilterFileSystem {
// the format is of the form // the format is of the form
// filename "dir"/"file" partFileName startIndex length // filename "dir"/"file" partFileName startIndex length
// <space seperated children> // <space seperated children>
private static class HarStatus { private class HarStatus {
boolean isDir; boolean isDir;
String name; String name;
List<String> children; List<String> children;
String partName; String partName;
long startIndex; long startIndex;
long length; long length;
public HarStatus(String harString) { public HarStatus(String harString) throws UnsupportedEncodingException {
String[] splits = harString.split(" "); String[] splits = harString.split(" ");
this.name = splits[0]; this.name = decodeFileName(splits[0]);
this.isDir = "dir".equals(splits[1]) ? true: false; this.isDir = "dir".equals(splits[1]) ? true: false;
// this is equal to "none" if its a directory // this is equal to "none" if its a directory
this.partName = splits[2]; this.partName = splits[2];
@ -520,7 +533,7 @@ public class HarFileSystem extends FilterFileSystem {
if (isDir) { if (isDir) {
children = new ArrayList<String>(); children = new ArrayList<String>();
for (int i = 5; i < splits.length; i++) { for (int i = 5; i < splits.length; i++) {
children.add(splits[i]); children.add(decodeFileName(splits[i]));
} }
} }
} }