diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt
index b3a94f72f0f..37e78fc4ffe 100644
--- a/hadoop-common-project/hadoop-common/CHANGES.txt
+++ b/hadoop-common-project/hadoop-common/CHANGES.txt
@@ -132,6 +132,9 @@ Release 2.1.2 - UNRELEASED
HADOOP-9761. ViewFileSystem#rename fails when using DistributedFileSystem.
(Andrew Wang via Colin Patrick McCabe)
+ HADOOP-10003. HarFileSystem.listLocatedStatus() fails.
+ (Jason Dere and suresh via suresh)
+
Release 2.1.1-beta - 2013-09-23
INCOMPATIBLE CHANGES
diff --git a/hadoop-common-project/hadoop-common/pom.xml b/hadoop-common-project/hadoop-common/pom.xml
index 5d243356100..b72c7ee8055 100644
--- a/hadoop-common-project/hadoop-common/pom.xml
+++ b/hadoop-common-project/hadoop-common/pom.xml
@@ -458,6 +458,10 @@
src/main/native/src/org/apache/hadoop/io/compress/lz4/lz4hc.c
src/main/native/src/org/apache/hadoop/io/compress/lz4/lz4hc_encoder.h
src/test/java/org/apache/hadoop/fs/test-untar.tgz
+ src/test/resources/test.har/_SUCCESS
+ src/test/resources/test.har/_index
+ src/test/resources/test.har/_masterindex
+ src/test/resources/test.har/part-0
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/HarFileSystem.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/HarFileSystem.java
index 6f1dab0f8ac..459a574296f 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/HarFileSystem.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/HarFileSystem.java
@@ -17,20 +17,6 @@
*/
package org.apache.hadoop.fs;
-import java.io.FileNotFoundException;
-import java.io.IOException;
-import java.io.UnsupportedEncodingException;
-import java.net.URI;
-import java.net.URISyntaxException;
-import java.net.URLDecoder;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.List;
-import java.util.LinkedHashMap;
-import java.util.Map;
-import java.util.TreeMap;
-import java.util.HashMap;
-
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
@@ -40,6 +26,14 @@ import org.apache.hadoop.io.Text;
import org.apache.hadoop.util.LineReader;
import org.apache.hadoop.util.Progressable;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.UnsupportedEncodingException;
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.net.URLDecoder;
+import java.util.*;
+
/**
* This is an implementation of the Hadoop Archive
* Filesystem. This archive Filesystem has index files
@@ -53,7 +47,7 @@ import org.apache.hadoop.util.Progressable;
* index for ranges of hashcodes.
*/
-public class HarFileSystem extends FilterFileSystem {
+public class HarFileSystem extends FileSystem {
private static final Log LOG = LogFactory.getLog(HarFileSystem.class);
@@ -75,11 +69,13 @@ public class HarFileSystem extends FilterFileSystem {
// pointer into the static metadata cache
private HarMetaData metadata;
+ private FileSystem fs;
+
/**
* public construction of harfilesystem
- *
*/
public HarFileSystem() {
+ // Must call #initialize() method to set the underlying file system
}
/**
@@ -96,10 +92,11 @@ public class HarFileSystem extends FilterFileSystem {
/**
* Constructor to create a HarFileSystem with an
* underlying filesystem.
- * @param fs
+ * @param fs underlying file system
*/
public HarFileSystem(FileSystem fs) {
- super(fs);
+ this.fs = fs;
+ this.statistics = fs.statistics;
}
private synchronized void initializeMetadataCache(Configuration conf) {
@@ -171,6 +168,11 @@ public class HarFileSystem extends FilterFileSystem {
}
}
+ @Override
+ public Configuration getConf() {
+ return fs.getConf();
+ }
+
// get the version of the filesystem from the masterindex file
// the version is currently not useful since its the first version
// of archives
@@ -236,8 +238,7 @@ public class HarFileSystem extends FilterFileSystem {
throw new IOException("query component in Path not supported " + rawURI);
}
- URI tmp = null;
-
+ URI tmp;
try {
// convert - to ://
URI baseUri = new URI(authority.replaceFirst("-", "://"));
@@ -256,7 +257,7 @@ public class HarFileSystem extends FilterFileSystem {
return URLDecoder.decode(str, "UTF-8");
}
- private String decodeFileName(String fname)
+ private String decodeFileName(String fname)
throws UnsupportedEncodingException {
int version = metadata.getVersion();
if (version == 2 || version == 3){
@@ -276,7 +277,7 @@ public class HarFileSystem extends FilterFileSystem {
/**
* Create a har specific auth
* har-underlyingfs:port
- * @param underLyingURI the uri of underlying
+ * @param underLyingUri the uri of underlying
* filesystem
* @return har specific auth
*/
@@ -294,7 +295,12 @@ public class HarFileSystem extends FilterFileSystem {
}
return auth;
}
-
+
+ @Override
+ protected URI getCanonicalUri() {
+ return fs.canonicalizeUri(getUri());
+ }
+
/**
* Returns the uri of this filesystem.
* The uri is of the form
@@ -419,7 +425,7 @@ public class HarFileSystem extends FilterFileSystem {
/**
* Get block locations from the underlying fs and fix their
* offsets and lengths.
- * @param file the input filestatus to get block locations
+ * @param file the input file status to get block locations
* @param start the start of the desired range in the contained file
* @param len the length of the desired range
* @return block locations for this segment of file
@@ -441,8 +447,7 @@ public class HarFileSystem extends FilterFileSystem {
}
/**
- * the hash of the path p inside iniside
- * the filesystem
+ * the hash of the path p inside the filesystem
* @param p the path in the harfilesystem
* @return the hash code of the path.
*/
@@ -475,13 +480,9 @@ public class HarFileSystem extends FilterFileSystem {
* the parent path directory
* @param statuses
* the list to add the children filestatuses to
- * @param children
- * the string list of children for this parent
- * @param archiveIndexStat
- * the archive index filestatus
*/
- private void fileStatusesInIndex(HarStatus parent, List statuses,
- List children) throws IOException {
+ private void fileStatusesInIndex(HarStatus parent, List statuses)
+ throws IOException {
String parentString = parent.getName();
if (!parentString.endsWith(Path.SEPARATOR)){
parentString += Path.SEPARATOR;
@@ -547,7 +548,7 @@ public class HarFileSystem extends FilterFileSystem {
// stored in a single line in the index files
// the format is of the form
// filename "dir"/"file" partFileName startIndex length
- //
+ //
private class HarStatus {
boolean isDir;
String name;
@@ -666,7 +667,6 @@ public class HarFileSystem extends FilterFileSystem {
public FSDataInputStream open(Path f, int bufferSize) throws IOException {
// get the fs DataInputStream for the underlying file
HarStatus hstatus = getFileHarStatus(f);
- // we got it.. woo hooo!!!
if (hstatus.isDir()) {
throw new FileNotFoundException(f + " : not a file in " +
archivePath);
@@ -686,7 +686,12 @@ public class HarFileSystem extends FilterFileSystem {
Progressable progress) throws IOException {
throw new IOException("Har: create not allowed.");
}
-
+
+ @Override
+ public FSDataOutputStream append(Path f, int bufferSize, Progressable progress) throws IOException {
+ throw new IOException("Har: append not allowed.");
+ }
+
@Override
public void close() throws IOException {
if (fs != null) {
@@ -704,9 +709,19 @@ public class HarFileSystem extends FilterFileSystem {
*/
@Override
public boolean setReplication(Path src, short replication) throws IOException{
- throw new IOException("Har: setreplication not allowed");
+ throw new IOException("Har: setReplication not allowed");
}
-
+
+ @Override
+ public boolean rename(Path src, Path dst) throws IOException {
+ throw new IOException("Har: rename not allowed");
+ }
+
+ @Override
+ public FSDataOutputStream append(Path f) throws IOException {
+ throw new IOException("Har: append not allowed");
+ }
+
/**
* Not implemented.
*/
@@ -714,7 +729,7 @@ public class HarFileSystem extends FilterFileSystem {
public boolean delete(Path f, boolean recursive) throws IOException {
throw new IOException("Har: delete not allowed");
}
-
+
/**
* liststatus returns the children of a directory
* after looking up the index files.
@@ -733,7 +748,7 @@ public class HarFileSystem extends FilterFileSystem {
throw new FileNotFoundException("File " + f + " not found in " + archivePath);
}
if (hstatus.isDir()) {
- fileStatusesInIndex(hstatus, statuses, hstatus.children);
+ fileStatusesInIndex(hstatus, statuses);
} else {
statuses.add(toFileStatus(hstatus, null));
}
@@ -748,7 +763,7 @@ public class HarFileSystem extends FilterFileSystem {
public Path getHomeDirectory() {
return new Path(uri.toString());
}
-
+
@Override
public void setWorkingDirectory(Path newDir) {
//does nothing.
@@ -811,7 +826,7 @@ public class HarFileSystem extends FilterFileSystem {
* Not implemented.
*/
@Override
- public void setPermission(Path p, FsPermission permisssion)
+ public void setPermission(Path p, FsPermission permission)
throws IOException {
throw new IOException("Har: setPermission not allowed");
}
@@ -900,7 +915,7 @@ public class HarFileSystem extends FilterFileSystem {
newlen = (int) (end - position);
}
// end case
- if (newlen == 0)
+ if (newlen == 0)
return ret;
ret = underLyingStream.read(b, offset, newlen);
position += ret;
@@ -937,8 +952,8 @@ public class HarFileSystem extends FilterFileSystem {
@Override
public boolean seekToNewSource(long targetPos) throws IOException {
- //do not need to implement this
- // hdfs in itself does seektonewsource
+ // do not need to implement this
+ // hdfs in itself does seektonewsource
// while reading.
return false;
}
@@ -974,14 +989,12 @@ public class HarFileSystem extends FilterFileSystem {
}
@Override
- public void setReadahead(Long readahead)
- throws IOException, UnsupportedEncodingException {
+ public void setReadahead(Long readahead) throws IOException {
underLyingStream.setReadahead(readahead);
}
@Override
- public void setDropBehind(Boolean dropBehind)
- throws IOException, UnsupportedEncodingException {
+ public void setDropBehind(Boolean dropBehind) throws IOException {
underLyingStream.setDropBehind(dropBehind);
}
}
@@ -999,19 +1012,6 @@ public class HarFileSystem extends FilterFileSystem {
long length, int bufsize) throws IOException {
super(new HarFsInputStream(fs, p, start, length, bufsize));
}
-
- /**
- * constructor for har input stream.
- * @param fs the underlying filesystem
- * @param p the path in the underlying file system
- * @param start the start position in the part file
- * @param length the length of valid data in the part file.
- * @throws IOException
- */
- public HarFSDataInputStream(FileSystem fs, Path p, long start, long length)
- throws IOException {
- super(new HarFsInputStream(fs, p, start, length, 0));
- }
}
private class HarMetaData {
@@ -1058,7 +1058,7 @@ public class HarFileSystem extends FilterFileSystem {
}
private void parseMetaData() throws IOException {
- Text line;
+ Text line = new Text();
long read;
FSDataInputStream in = null;
LineReader lin = null;
@@ -1068,7 +1068,6 @@ public class HarFileSystem extends FilterFileSystem {
FileStatus masterStat = fs.getFileStatus(masterIndexPath);
masterIndexTimestamp = masterStat.getModificationTime();
lin = new LineReader(in, getConf());
- line = new Text();
read = lin.readLine(line);
// the first line contains the version of the index file
@@ -1082,7 +1081,7 @@ public class HarFileSystem extends FilterFileSystem {
}
// each line contains a hashcode range and the index file name
- String[] readStr = null;
+ String[] readStr;
while(read < masterStat.getLen()) {
int b = lin.readLine(line);
read += b;
@@ -1094,6 +1093,9 @@ public class HarFileSystem extends FilterFileSystem {
endHash));
line.clear();
}
+ } catch (IOException ioe) {
+ LOG.warn("Encountered exception ", ioe);
+ throw ioe;
} finally {
IOUtils.cleanup(LOG, lin, in);
}
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestHarFileSystemBasics.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestHarFileSystemBasics.java
index 424257496ef..237d7161f72 100644
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestHarFileSystemBasics.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestHarFileSystemBasics.java
@@ -18,14 +18,6 @@
package org.apache.hadoop.fs;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
-import static org.junit.Assert.assertFalse;
-
-import java.io.File;
-import java.io.IOException;
-import java.net.URI;
-
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.util.Shell;
@@ -34,6 +26,14 @@ import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
+import java.io.File;
+import java.io.IOException;
+import java.net.URI;
+import java.util.HashSet;
+import java.util.Set;
+
+import static org.junit.Assert.*;
+
/**
* This test class checks basic operations with {@link HarFileSystem} including
* various initialization cases, getters, and modification methods.
@@ -69,7 +69,7 @@ public class TestHarFileSystemBasics {
/*
* creates and returns fully initialized HarFileSystem
*/
- private HarFileSystem createHarFileSysten(final Configuration conf)
+ private HarFileSystem createHarFileSystem(final Configuration conf)
throws Exception {
localFileSystem = FileSystem.getLocal(conf);
localFileSystem.initialize(new URI("file:///"), conf);
@@ -130,7 +130,7 @@ public class TestHarFileSystemBasics {
}
// create Har to test:
conf = new Configuration();
- harFileSystem = createHarFileSysten(conf);
+ harFileSystem = createHarFileSystem(conf);
}
@After
@@ -232,6 +232,32 @@ public class TestHarFileSystemBasics {
assertTrue(p2.toUri().toString().startsWith("har://file-localhost/"));
}
+ @Test
+ public void testListLocatedStatus() throws Exception {
+ String testHarPath = this.getClass().getResource("/test.har").getPath();
+ URI uri = new URI("har://" + testHarPath);
+ HarFileSystem hfs = new HarFileSystem(localFileSystem);
+ hfs.initialize(uri, new Configuration());
+
+ // test.har has the following contents:
+ // dir1/1.txt
+ // dir1/2.txt
+ Set expectedFileNames = new HashSet();
+ expectedFileNames.add("1.txt");
+ expectedFileNames.add("2.txt");
+
+ // List contents of dir, and ensure we find all expected files
+ Path path = new Path("dir1");
+ RemoteIterator fileList = hfs.listLocatedStatus(path);
+ while (fileList.hasNext()) {
+ String fileName = fileList.next().getPath().getName();
+ assertTrue(fileName + " not in expected files list", expectedFileNames.contains(fileName));
+ expectedFileNames.remove(fileName);
+ }
+ assertEquals("Didn't find all of the expected file names: " + expectedFileNames,
+ 0, expectedFileNames.size());
+ }
+
// ========== Negative:
@Test
diff --git a/hadoop-common-project/hadoop-common/src/test/resources/test.har/.part-0.crc b/hadoop-common-project/hadoop-common/src/test/resources/test.har/.part-0.crc
new file mode 100644
index 00000000000..3b7b044936a
Binary files /dev/null and b/hadoop-common-project/hadoop-common/src/test/resources/test.har/.part-0.crc differ
diff --git a/hadoop-common-project/hadoop-common/src/test/resources/test.har/_SUCCESS b/hadoop-common-project/hadoop-common/src/test/resources/test.har/_SUCCESS
new file mode 100755
index 00000000000..e69de29bb2d
diff --git a/hadoop-common-project/hadoop-common/src/test/resources/test.har/_index b/hadoop-common-project/hadoop-common/src/test/resources/test.har/_index
new file mode 100755
index 00000000000..0360e5beabc
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/test/resources/test.har/_index
@@ -0,0 +1,4 @@
+%2F dir 1380270822000+511+root+wheel 0 0 dir1
+%2Fdir1 dir 1380270441000+493+jdere+wheel 0 0 1.txt 2.txt
+%2Fdir1%2F1.txt file part-0 0 0 1380270439000+420+jdere+wheel
+%2Fdir1%2F2.txt file part-0 0 0 1380270441000+420+jdere+wheel
diff --git a/hadoop-common-project/hadoop-common/src/test/resources/test.har/_masterindex b/hadoop-common-project/hadoop-common/src/test/resources/test.har/_masterindex
new file mode 100755
index 00000000000..c028f4e65a7
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/test/resources/test.har/_masterindex
@@ -0,0 +1,2 @@
+3
+0 1210114968 0 232
diff --git a/hadoop-common-project/hadoop-common/src/test/resources/test.har/part-0 b/hadoop-common-project/hadoop-common/src/test/resources/test.har/part-0
new file mode 100755
index 00000000000..e69de29bb2d