diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt index b3a94f72f0f..37e78fc4ffe 100644 --- a/hadoop-common-project/hadoop-common/CHANGES.txt +++ b/hadoop-common-project/hadoop-common/CHANGES.txt @@ -132,6 +132,9 @@ Release 2.1.2 - UNRELEASED HADOOP-9761. ViewFileSystem#rename fails when using DistributedFileSystem. (Andrew Wang via Colin Patrick McCabe) + HADOOP-10003. HarFileSystem.listLocatedStatus() fails. + (Jason Dere and suresh via suresh) + Release 2.1.1-beta - 2013-09-23 INCOMPATIBLE CHANGES diff --git a/hadoop-common-project/hadoop-common/pom.xml b/hadoop-common-project/hadoop-common/pom.xml index 5d243356100..b72c7ee8055 100644 --- a/hadoop-common-project/hadoop-common/pom.xml +++ b/hadoop-common-project/hadoop-common/pom.xml @@ -458,6 +458,10 @@ src/main/native/src/org/apache/hadoop/io/compress/lz4/lz4hc.c src/main/native/src/org/apache/hadoop/io/compress/lz4/lz4hc_encoder.h src/test/java/org/apache/hadoop/fs/test-untar.tgz + src/test/resources/test.har/_SUCCESS + src/test/resources/test.har/_index + src/test/resources/test.har/_masterindex + src/test/resources/test.har/part-0 diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/HarFileSystem.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/HarFileSystem.java index 6f1dab0f8ac..459a574296f 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/HarFileSystem.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/HarFileSystem.java @@ -17,20 +17,6 @@ */ package org.apache.hadoop.fs; -import java.io.FileNotFoundException; -import java.io.IOException; -import java.io.UnsupportedEncodingException; -import java.net.URI; -import java.net.URISyntaxException; -import java.net.URLDecoder; -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; -import java.util.LinkedHashMap; -import java.util.Map; -import java.util.TreeMap; -import java.util.HashMap; - import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; @@ -40,6 +26,14 @@ import org.apache.hadoop.io.Text; import org.apache.hadoop.util.LineReader; import org.apache.hadoop.util.Progressable; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.UnsupportedEncodingException; +import java.net.URI; +import java.net.URISyntaxException; +import java.net.URLDecoder; +import java.util.*; + /** * This is an implementation of the Hadoop Archive * Filesystem. This archive Filesystem has index files @@ -53,7 +47,7 @@ import org.apache.hadoop.util.Progressable; * index for ranges of hashcodes. */ -public class HarFileSystem extends FilterFileSystem { +public class HarFileSystem extends FileSystem { private static final Log LOG = LogFactory.getLog(HarFileSystem.class); @@ -75,11 +69,13 @@ public class HarFileSystem extends FilterFileSystem { // pointer into the static metadata cache private HarMetaData metadata; + private FileSystem fs; + /** * public construction of harfilesystem - * */ public HarFileSystem() { + // Must call #initialize() method to set the underlying file system } /** @@ -96,10 +92,11 @@ public class HarFileSystem extends FilterFileSystem { /** * Constructor to create a HarFileSystem with an * underlying filesystem. - * @param fs + * @param fs underlying file system */ public HarFileSystem(FileSystem fs) { - super(fs); + this.fs = fs; + this.statistics = fs.statistics; } private synchronized void initializeMetadataCache(Configuration conf) { @@ -171,6 +168,11 @@ public class HarFileSystem extends FilterFileSystem { } } + @Override + public Configuration getConf() { + return fs.getConf(); + } + // get the version of the filesystem from the masterindex file // the version is currently not useful since its the first version // of archives @@ -236,8 +238,7 @@ public class HarFileSystem extends FilterFileSystem { throw new IOException("query component in Path not supported " + rawURI); } - URI tmp = null; - + URI tmp; try { // convert - to :// URI baseUri = new URI(authority.replaceFirst("-", "://")); @@ -256,7 +257,7 @@ public class HarFileSystem extends FilterFileSystem { return URLDecoder.decode(str, "UTF-8"); } - private String decodeFileName(String fname) + private String decodeFileName(String fname) throws UnsupportedEncodingException { int version = metadata.getVersion(); if (version == 2 || version == 3){ @@ -276,7 +277,7 @@ public class HarFileSystem extends FilterFileSystem { /** * Create a har specific auth * har-underlyingfs:port - * @param underLyingURI the uri of underlying + * @param underLyingUri the uri of underlying * filesystem * @return har specific auth */ @@ -294,7 +295,12 @@ public class HarFileSystem extends FilterFileSystem { } return auth; } - + + @Override + protected URI getCanonicalUri() { + return fs.canonicalizeUri(getUri()); + } + /** * Returns the uri of this filesystem. * The uri is of the form @@ -419,7 +425,7 @@ public class HarFileSystem extends FilterFileSystem { /** * Get block locations from the underlying fs and fix their * offsets and lengths. - * @param file the input filestatus to get block locations + * @param file the input file status to get block locations * @param start the start of the desired range in the contained file * @param len the length of the desired range * @return block locations for this segment of file @@ -441,8 +447,7 @@ public class HarFileSystem extends FilterFileSystem { } /** - * the hash of the path p inside iniside - * the filesystem + * the hash of the path p inside the filesystem * @param p the path in the harfilesystem * @return the hash code of the path. */ @@ -475,13 +480,9 @@ public class HarFileSystem extends FilterFileSystem { * the parent path directory * @param statuses * the list to add the children filestatuses to - * @param children - * the string list of children for this parent - * @param archiveIndexStat - * the archive index filestatus */ - private void fileStatusesInIndex(HarStatus parent, List statuses, - List children) throws IOException { + private void fileStatusesInIndex(HarStatus parent, List statuses) + throws IOException { String parentString = parent.getName(); if (!parentString.endsWith(Path.SEPARATOR)){ parentString += Path.SEPARATOR; @@ -547,7 +548,7 @@ public class HarFileSystem extends FilterFileSystem { // stored in a single line in the index files // the format is of the form // filename "dir"/"file" partFileName startIndex length - // + // private class HarStatus { boolean isDir; String name; @@ -666,7 +667,6 @@ public class HarFileSystem extends FilterFileSystem { public FSDataInputStream open(Path f, int bufferSize) throws IOException { // get the fs DataInputStream for the underlying file HarStatus hstatus = getFileHarStatus(f); - // we got it.. woo hooo!!! if (hstatus.isDir()) { throw new FileNotFoundException(f + " : not a file in " + archivePath); @@ -686,7 +686,12 @@ public class HarFileSystem extends FilterFileSystem { Progressable progress) throws IOException { throw new IOException("Har: create not allowed."); } - + + @Override + public FSDataOutputStream append(Path f, int bufferSize, Progressable progress) throws IOException { + throw new IOException("Har: append not allowed."); + } + @Override public void close() throws IOException { if (fs != null) { @@ -704,9 +709,19 @@ public class HarFileSystem extends FilterFileSystem { */ @Override public boolean setReplication(Path src, short replication) throws IOException{ - throw new IOException("Har: setreplication not allowed"); + throw new IOException("Har: setReplication not allowed"); } - + + @Override + public boolean rename(Path src, Path dst) throws IOException { + throw new IOException("Har: rename not allowed"); + } + + @Override + public FSDataOutputStream append(Path f) throws IOException { + throw new IOException("Har: append not allowed"); + } + /** * Not implemented. */ @@ -714,7 +729,7 @@ public class HarFileSystem extends FilterFileSystem { public boolean delete(Path f, boolean recursive) throws IOException { throw new IOException("Har: delete not allowed"); } - + /** * liststatus returns the children of a directory * after looking up the index files. @@ -733,7 +748,7 @@ public class HarFileSystem extends FilterFileSystem { throw new FileNotFoundException("File " + f + " not found in " + archivePath); } if (hstatus.isDir()) { - fileStatusesInIndex(hstatus, statuses, hstatus.children); + fileStatusesInIndex(hstatus, statuses); } else { statuses.add(toFileStatus(hstatus, null)); } @@ -748,7 +763,7 @@ public class HarFileSystem extends FilterFileSystem { public Path getHomeDirectory() { return new Path(uri.toString()); } - + @Override public void setWorkingDirectory(Path newDir) { //does nothing. @@ -811,7 +826,7 @@ public class HarFileSystem extends FilterFileSystem { * Not implemented. */ @Override - public void setPermission(Path p, FsPermission permisssion) + public void setPermission(Path p, FsPermission permission) throws IOException { throw new IOException("Har: setPermission not allowed"); } @@ -900,7 +915,7 @@ public class HarFileSystem extends FilterFileSystem { newlen = (int) (end - position); } // end case - if (newlen == 0) + if (newlen == 0) return ret; ret = underLyingStream.read(b, offset, newlen); position += ret; @@ -937,8 +952,8 @@ public class HarFileSystem extends FilterFileSystem { @Override public boolean seekToNewSource(long targetPos) throws IOException { - //do not need to implement this - // hdfs in itself does seektonewsource + // do not need to implement this + // hdfs in itself does seektonewsource // while reading. return false; } @@ -974,14 +989,12 @@ public class HarFileSystem extends FilterFileSystem { } @Override - public void setReadahead(Long readahead) - throws IOException, UnsupportedEncodingException { + public void setReadahead(Long readahead) throws IOException { underLyingStream.setReadahead(readahead); } @Override - public void setDropBehind(Boolean dropBehind) - throws IOException, UnsupportedEncodingException { + public void setDropBehind(Boolean dropBehind) throws IOException { underLyingStream.setDropBehind(dropBehind); } } @@ -999,19 +1012,6 @@ public class HarFileSystem extends FilterFileSystem { long length, int bufsize) throws IOException { super(new HarFsInputStream(fs, p, start, length, bufsize)); } - - /** - * constructor for har input stream. - * @param fs the underlying filesystem - * @param p the path in the underlying file system - * @param start the start position in the part file - * @param length the length of valid data in the part file. - * @throws IOException - */ - public HarFSDataInputStream(FileSystem fs, Path p, long start, long length) - throws IOException { - super(new HarFsInputStream(fs, p, start, length, 0)); - } } private class HarMetaData { @@ -1058,7 +1058,7 @@ public class HarFileSystem extends FilterFileSystem { } private void parseMetaData() throws IOException { - Text line; + Text line = new Text(); long read; FSDataInputStream in = null; LineReader lin = null; @@ -1068,7 +1068,6 @@ public class HarFileSystem extends FilterFileSystem { FileStatus masterStat = fs.getFileStatus(masterIndexPath); masterIndexTimestamp = masterStat.getModificationTime(); lin = new LineReader(in, getConf()); - line = new Text(); read = lin.readLine(line); // the first line contains the version of the index file @@ -1082,7 +1081,7 @@ public class HarFileSystem extends FilterFileSystem { } // each line contains a hashcode range and the index file name - String[] readStr = null; + String[] readStr; while(read < masterStat.getLen()) { int b = lin.readLine(line); read += b; @@ -1094,6 +1093,9 @@ public class HarFileSystem extends FilterFileSystem { endHash)); line.clear(); } + } catch (IOException ioe) { + LOG.warn("Encountered exception ", ioe); + throw ioe; } finally { IOUtils.cleanup(LOG, lin, in); } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestHarFileSystemBasics.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestHarFileSystemBasics.java index 424257496ef..237d7161f72 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestHarFileSystemBasics.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestHarFileSystemBasics.java @@ -18,14 +18,6 @@ package org.apache.hadoop.fs; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; -import static org.junit.Assert.assertFalse; - -import java.io.File; -import java.io.IOException; -import java.net.URI; - import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.util.Shell; @@ -34,6 +26,14 @@ import org.junit.Assert; import org.junit.Before; import org.junit.Test; +import java.io.File; +import java.io.IOException; +import java.net.URI; +import java.util.HashSet; +import java.util.Set; + +import static org.junit.Assert.*; + /** * This test class checks basic operations with {@link HarFileSystem} including * various initialization cases, getters, and modification methods. @@ -69,7 +69,7 @@ public class TestHarFileSystemBasics { /* * creates and returns fully initialized HarFileSystem */ - private HarFileSystem createHarFileSysten(final Configuration conf) + private HarFileSystem createHarFileSystem(final Configuration conf) throws Exception { localFileSystem = FileSystem.getLocal(conf); localFileSystem.initialize(new URI("file:///"), conf); @@ -130,7 +130,7 @@ public class TestHarFileSystemBasics { } // create Har to test: conf = new Configuration(); - harFileSystem = createHarFileSysten(conf); + harFileSystem = createHarFileSystem(conf); } @After @@ -232,6 +232,32 @@ public class TestHarFileSystemBasics { assertTrue(p2.toUri().toString().startsWith("har://file-localhost/")); } + @Test + public void testListLocatedStatus() throws Exception { + String testHarPath = this.getClass().getResource("/test.har").getPath(); + URI uri = new URI("har://" + testHarPath); + HarFileSystem hfs = new HarFileSystem(localFileSystem); + hfs.initialize(uri, new Configuration()); + + // test.har has the following contents: + // dir1/1.txt + // dir1/2.txt + Set expectedFileNames = new HashSet(); + expectedFileNames.add("1.txt"); + expectedFileNames.add("2.txt"); + + // List contents of dir, and ensure we find all expected files + Path path = new Path("dir1"); + RemoteIterator fileList = hfs.listLocatedStatus(path); + while (fileList.hasNext()) { + String fileName = fileList.next().getPath().getName(); + assertTrue(fileName + " not in expected files list", expectedFileNames.contains(fileName)); + expectedFileNames.remove(fileName); + } + assertEquals("Didn't find all of the expected file names: " + expectedFileNames, + 0, expectedFileNames.size()); + } + // ========== Negative: @Test diff --git a/hadoop-common-project/hadoop-common/src/test/resources/test.har/.part-0.crc b/hadoop-common-project/hadoop-common/src/test/resources/test.har/.part-0.crc new file mode 100644 index 00000000000..3b7b044936a Binary files /dev/null and b/hadoop-common-project/hadoop-common/src/test/resources/test.har/.part-0.crc differ diff --git a/hadoop-common-project/hadoop-common/src/test/resources/test.har/_SUCCESS b/hadoop-common-project/hadoop-common/src/test/resources/test.har/_SUCCESS new file mode 100755 index 00000000000..e69de29bb2d diff --git a/hadoop-common-project/hadoop-common/src/test/resources/test.har/_index b/hadoop-common-project/hadoop-common/src/test/resources/test.har/_index new file mode 100755 index 00000000000..0360e5beabc --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/resources/test.har/_index @@ -0,0 +1,4 @@ +%2F dir 1380270822000+511+root+wheel 0 0 dir1 +%2Fdir1 dir 1380270441000+493+jdere+wheel 0 0 1.txt 2.txt +%2Fdir1%2F1.txt file part-0 0 0 1380270439000+420+jdere+wheel +%2Fdir1%2F2.txt file part-0 0 0 1380270441000+420+jdere+wheel diff --git a/hadoop-common-project/hadoop-common/src/test/resources/test.har/_masterindex b/hadoop-common-project/hadoop-common/src/test/resources/test.har/_masterindex new file mode 100755 index 00000000000..c028f4e65a7 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/resources/test.har/_masterindex @@ -0,0 +1,2 @@ +3 +0 1210114968 0 232 diff --git a/hadoop-common-project/hadoop-common/src/test/resources/test.har/part-0 b/hadoop-common-project/hadoop-common/src/test/resources/test.har/part-0 new file mode 100755 index 00000000000..e69de29bb2d